In [None]:
# Answer 2 

'''
a. Build a machine learning model to predict customer churn based on a given dataset. Train the model using appropriate algorithms and
evaluate its performance.
'''
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the customer churn dataset
data = pd.read_csv("customer_churn.csv")

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data, data["Churn"], test_size=0.25)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model's performance
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
'''
b. Develop a model training pipeline that incorporates feature engineering techniques such as one-hot encoding, feature scaling, and dimensionality reduction.
'''

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the customer churn dataset
data = pd.read_csv("customer_churn.csv")

# One-hot encode the categorical features
encoder = OneHotEncoder()
X_cat = encoder.fit_transform(data[["Gender", "Tenure", "Contract"]])

# Scale the numerical features
scaler = StandardScaler()
X_num = scaler.fit_transform(data[["MonthlyCharges", "TotalCharges"]])

# Combine the categorical and numerical features
X = np.concatenate([X_cat, X_num], axis=1)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, data["Churn"], test_size=0.25)

# Reduce the dimensionality of the data using PCA
pca = PCA(n_components=10)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model's performance
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
'''
c. Train a deep learning model for image classification using transfer learning and fine-tuning techniques.
'''

import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load the VGG16 model
vgg16 = VGG16(weights="imagenet", include_top=False)

# Freeze the VGG16 model
for layer in vgg16.layers:
    layer.trainable = False

# Add a new dense layer on top of the VGG16 model
model = Sequential()
model.add(vgg16)
model.add(Dense(1000, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation="softmax"))

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=10)

# Evaluate the model's performance
model.evaluate(X_test, y_test)


In [None]:
# Answer 1


'''
a. Design a data ingestion pipeline that collects and stores data from various sources such as databases, APIs, and streaming platforms.
'''
import requests
import json
import pandas as pd

# Define the API endpoint
endpoint = "https://api.example.com/data"

# Make the API request
response = requests.get(endpoint)

# Check the response status code
if response.status_code == 200:
    # The request was successful
    data = json.loads(response.content)
else:
    # The request failed
    print("Error:", response.status_code)

# Store the data in a Pandas DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv("data.csv")


In [None]:
'''
b. Implement a real-time data ingestion pipeline for processing sensor data from IoT devices.
'''
import paho.mqtt.client as mqtt

# Create a client
client = mqtt.Client()

# Connect to the broker
client.connect("localhost", 1883)

# Subscribe to the topic
client.subscribe("sensor/data")

# Define a callback function
def on_message(client, userdata, message):
    print(message.payload.decode())

# Register the callback function
client.on_message = on_message

# Start listening for messages
client.loop_forever()


In [None]:
'''
c. Develop a data ingestion pipeline that handles data from different file formats (CSV, JSON, etc.) and performs data validation and cleansing.

'''
import csv
import json
import pandas as pd

# Define the file path
filepath = "data.csv"

# Read the data from the file
with open(filepath, "r") as f:
    reader = csv.reader(f)
    data = list(reader)

# Validate the data
for row in data:
    if len(row) != 3:
        raise ValueError("Invalid row:", row)

# Cleanse the data
for row in data:
    for i in range(3):
        row[i] = row[i].strip()

# Store the data in a Pandas DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a JSON file
df.to_json("data.json")


In [None]:
# answer 3

'''
a. Implement cross-validation to evaluate the performance of a regression model for predicting housing prices.

'''
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

# Load the housing prices dataset
data = np.loadtxt("housing_prices.csv", delimiter=",")

# Split the data into features and labels
X = data[:, :-1]
y = data[:, -1]

# Create a linear regression model
model = LinearRegression()

# Perform 10-fold cross-validation
scores = cross_val_score(model, X, y, cv=10)

# Print the cross-validation scores
print(scores)


In [None]:
'''
b. Perform model validation using different evaluation metrics such as accuracy, precision, recall, and F1 score for a binary classification problem.

'''
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the binary classification dataset
data = np.loadtxt("binary_classification.csv", delimiter=",")

# Split the data into features and labels
X = data[:, :-1]
y = data[:, -1]

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model on the test set
y_pred = model.predict(X_test)

# Calculate the accuracy, precision, recall, and F1 score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)


In [None]:
'''
c. Design a model validation strategy that incorporates stratified sampling to handle imbalanced datasets.

'''
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the imbalanced dataset
data = np.loadtxt("imbalanced_dataset.csv", delimiter=",")

# Count the number of samples for each class
class_counts = np.unique(data[:, -1], return_counts=True)

# Calculate the class imbalance ratio
imbalance_ratio = class_counts[1] / class_counts[0]

# Perform stratified sampling to balance the dataset
X_train, X_test, y_train, y_test = train_test_split(data, data[:, -1], test_size=0.25, stratify=data[:, -1])

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model on the test set
y_pred = model.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)


In [None]:
# answer 4
'''
a. Create a deployment strategy for a machine learning model that provides real-time recommendations based on user interactions.

'''
import boto3
import json

# Define the AWS Lambda function
def recommender(event, context):
    # Get the user's interactions
    interactions = json.loads(event["interactions"])

    # Make recommendations
    recommendations = []
    for interaction in interactions:
        product = interaction["product"]
        rating = interaction["rating"]
        recommendations.append(product)

    # Return the recommendations
    return {"recommendations": recommendations}

# Deploy the Lambda function
lambda_client = boto3.client("lambda")
lambda_client.create_function(
    FunctionName="recommender",
    Runtime="python3.8",
    Handler="recommender.lambda_handler",
    Code=open("recommender.py", "rb").read(),
)


In [None]:
'''
b. Develop a deployment pipeline that automates the process of deploying machine learning models to cloud platforms such as AWS or Azure.

'''
import argparse
import os
import subprocess

def deploy_model(model_path, cloud_platform):
    if cloud_platform == "aws":
        subprocess.run(["aws", "lambda", "deploy", "-f", model_path, "-t", "python3.8"])
    elif cloud_platform == "azure":
        subprocess.run(["az", "ml", "model", "deploy", "-f", model_path])

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_path", required=True)
    parser.add_argument("--cloud_platform", choices=["aws", "azure"], required=True)
    args = parser.parse_args()
    deploy_model(args.model_path, args.cloud_platform)


In [None]:
'''
c. Design a monitoring and maintenance strategy for deployed models to ensure their performance and reliability over time.

'''
import boto3
import json

# Define the AWS CloudWatch metrics
metrics = [
    "Latency",
    "Errors",
    "Invocations",
]

# Define the AWS CloudWatch alarms
alarms = [
    {
        "MetricName": "Latency",
        "Threshold": 100,
        "ComparisonOperator": "GreaterThanThreshold",
    },
    {
        "MetricName": "Errors",
        "Threshold": 10,
        "ComparisonOperator": "GreaterThanThreshold",
    },
]

# Create the CloudWatch alarms
cloudwatch_client = boto3.client("cloudwatch")
for alarm in alarms:
    cloudwatch_client.create_alarm(
        MetricName=alarm["MetricName"],
        Threshold=alarm["Threshold"],
        ComparisonOperator=alarm["ComparisonOperator"],
    )

# Monitor the model
while True:
    # Get the metrics from CloudWatch
    metrics_data = cloudwatch_client.get_metric_data(
        MetricNames=metrics,
        StartTime=datetime.now() - timedelta(days=1),
        EndTime=datetime.now(),
    )

    # Check the alarms
    for alarm in alarms:
        if metrics_data[alarm["MetricName"]]["Datapoints"][0]["Value"] > alarm["Threshold"]:
            print("Alarm triggered:", alarm["MetricName"])
