In [None]:

1. Data Ingestion Pipeline:
   a. Design a data ingestion pipeline that collects and stores data from various sources such as databases, APIs, and streaming platforms.
   b. Implement a real-time data ingestion pipeline for processing sensor data from IoT devices.
   c. Develop a data ingestion pipeline that handles data from different file formats (CSV, JSON, etc.) and performs data validation and cleansing.



In [None]:
Sensor_ID,Value,Time
1,23.5,2023-07-18 12:00:00
2,18.9,2023-07-18 12:01:00
3,21.2,2023-07-18 12:02:00
import csv
import datetime

def validate_data(row):
    try:
        sensor_id = int(row['Sensor_ID'])
        value = float(row['Value'])
        time = datetime.datetime.strptime(row['Time'], '%Y-%m-%d %H:%M:%S')
        return True, {'Sensor_ID': sensor_id, 'Value': value, 'Time': time}
    except (ValueError, KeyError, ValueError) as e:
        return False, e

def ingest_data_from_csv(file_path):
    with open(file_path, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            is_valid, data = validate_data(row)
            if is_valid:
                # Here, you can store the data into your database or data store
                print("Ingesting data:", data)
            else:
                print("Invalid data row:", row, "Error:", data)

if __name__ == "__main__":
    file_path = "data.csv"
    ingest_data_from_csv(file_path)


In [None]:
2. Model Training:
   a. Build a machine learning model to predict customer churn based on a given dataset. Train the model using appropriate algorithms and evaluate its performance.
   b. Develop a model training pipeline that incorporates feature engineering techniques such as one-hot encoding, feature scaling, and dimensionality reduction.
   c. Train a deep learning model for image classification using transfer learning and fine-tuning techniques.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load the dataset
data = pd.read_csv('customer_churn_dataset.csv')

# Preprocessing: Encode categorical features
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data['Contract'] = label_encoder.fit_transform(data['Contract'])

# Split the dataset into features (X) and target (y)
X = data.drop('Churn', axis=1)
y = data['Churn']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model (Random Forest Classifier)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Confusion Matrix:")
print(conf_matrix)


In [None]:
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv('dataset.csv')

# Split the dataset into features (X) and target (y)
X = data.drop('target', axis=1)
y = data['target']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Engineering: Define transformations
numeric_features = ['Age', 'Income']
categorical_features = ['Gender', 'Education']

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = FeatureUnion(transformer_list=[
    ('numeric', numeric_transformer, numeric_features),
    ('categorical', categorical_transformer, categorical_features)
])

# Build the model training pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('reduce_dim', PCA(n_components=2)),  # You can adjust the number of components as needed
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the pre-trained VGG16 model with imagenet weights
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom top layers for the classification task
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(256, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

# Create the new model for transfer learning
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Data augmentation and preprocessing
datagen = ImageDataGenerator(rescale=1.0/255.0, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)

# Load and preprocess the training and validation data
train_generator = datagen.flow_from_directory('train_dir', target_size=(224, 224), batch_size=batch_size, class_mode='categorical')
validation_generator = datagen.flow_from_directory('val_dir', target_size=(224, 224), batch_size=batch_size, class_mode='categorical')

# Train the model with fine-tuning
model.fit(train_generator, epochs=num_epochs, validation_data=validation_generator)

# Save the trained model
model.save('image_classification_model.h5')


In [None]:
3. Model Validation:
   a. Implement cross-validation to evaluate the performance of a regression model for predicting housing prices.
   b. Perform model validation using different evaluation metrics such as accuracy, precision, recall, and F1 score for a binary classification problem.
   c. Design a model validation strategy that incorporates stratified sampling to handle imbalanced datasets.


In [None]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

# Assuming you have your features (X) and target (y) data ready
# X: Feature matrix (shape: [n_samples, n_features])
# y: Target vector (shape: [n_samples, ])

# Create a regression model (e.g., Linear Regression)
model = LinearRegression()

# Perform k-fold cross-validation
k_folds = 5  # Choose the number of folds
scores = cross_val_score(model, X, y, cv=k_folds, scoring='neg_mean_squared_error')

# Calculate the average RMSE (Root Mean Squared Error) from the scores
mean_rmse = np.sqrt(-scores.mean())
print("Average RMSE:", mean_rmse)
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Assuming you have your features (X) and binary target (y) data ready
# X: Feature matrix (shape: [n_samples, n_features])
# y: Binary target vector (shape: [n_samples, ])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a binary classification model (e.g., Logistic Regression)
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Confusion Matrix:")
print(conf_matrix)
from sklearn.model_selection import train_test_split

# Assuming you have your features (X) and binary target (y) data ready
# X: Feature matrix (shape: [n_samples, n_features])
# y: Binary target vector (shape: [n_samples, ])

# Perform stratified sampling for train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Now you can proceed with training your model and evaluating its performance as before


In [None]:
4. Deployment Strategy:
   a. Create a deployment strategy for a machine learning model that provides real-time recommendations based on user interactions.
   b. Develop a deployment pipeline that automates the process of deploying machine learning models to cloud platforms such as AWS or Azure.
   c. Design a monitoring and maintenance strategy for deployed models to ensure their performance and reliability over time.


In [8]:
# Flask Application for Real-Time Recommendations
from flask import Flask, request, jsonify
import model   # Import your trained recommendation model here

app = Flask(__name__)

@app.route('/recommend', methods=['POST'])
def recommend():
    try:
        data = request.get_json()
        user_id = data['user_id']
        user_interactions = data['interactions']

        # Process user interactions and call the model for recommendations
        recommendations = model.get_recommendations(user_id, user_interactions)

        return jsonify({'recommendations': recommendations})
    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)


ModuleNotFoundError: No module named 'flask'

In [9]:
# Use an official Python runtime as a parent image
FROM python:3.9-slim

# Set the working directory to /app
WORKDIR /app

# Copy the current directory contents into the container at /app
COPY . /app

# Install the required dependencies
RUN pip install -r requirements.txt

# Define the command to run the Flask app on container startup
CMD ["python", "app.py"]


SyntaxError: invalid syntax (4111083209.py, line 2)

In [None]:
# Prometheus Metrics for Real-Time Recommendations
from prometheus_flask_exporter import PrometheusMetrics
from flask import Flask, request, jsonify
import model

app = Flask(__name__)
metrics = PrometheusMetrics(app)

@app.route('/recommend', methods=['POST'])
def recommend():
    try:
        data = request.get_json()
        user_id = data['user_id']
        user_interactions = data['interactions']

        # Process user interactions and call the model for recommendations
        recommendations = model.get_recommendations(user_id, user_interactions)

        # Increment recommendation counter for monitoring
        metrics.counter('recommendation_requests', 'Recommendation Requests Count', labels={'status': 'success'})

        return jsonify({'recommendations': recommendations})
    except Exception as e:
        # Increment recommendation counter and error counter for monitoring
        metrics.counter('recommendation_requests', 'Recommendation Requests Count', labels={'status': 'error'})
        metrics.counter('recommendation_errors', 'Recommendation Errors Count')
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)
