In [10]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.model_selection import GridSearchCV
import os

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('cleaned_amazon_reviews.csv')
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['Cleaned_Review'])
y = df['Sentiment']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42 , shuffle=True)


LogisticRegression

In [4]:
learning_rate = 0.002
epochs = 5

In [5]:
mlflow.set_experiment("Exp_new3")

# Start an MLflow run
with mlflow.start_run():
    # Log hyperparameters
    mlflow.log_param("learning_rate", learning_rate)
    mlflow.log_param("epochs", epochs)
    # mlflow.log_param("penalty", penalty)

    # Initialize and train the model
    model = LogisticRegression( solver='liblinear', max_iter=100, warm_start=True)

    for epoch in range(epochs):
        model.fit(X_train, y_train)

        # Predict and calculate accuracy
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # Log the accuracy as a metric
        mlflow.log_metric("accuracy", accuracy, step=epoch)
        print(f"Epoch {epoch+1}/{epochs} - Accuracy: {accuracy:.4f}")

    # Save the model as an artifact
    model_filename = "logistic_regression_modelv8.pkl"
    mlflow.sklearn.save_model(model, model_filename)

    # Log the saved model as an artifact
    mlflow.log_artifact(model_filename)


2024/10/15 17:26:30 INFO mlflow.tracking.fluent: Experiment with name 'Exp_new3' does not exist. Creating a new experiment.


Epoch 1/5 - Accuracy: 0.8423
Epoch 2/5 - Accuracy: 0.8423
Epoch 3/5 - Accuracy: 0.8423
Epoch 4/5 - Accuracy: 0.8423
Epoch 5/5 - Accuracy: 0.8423


SVC

In [6]:
#SVC
C = 1.0
kernel = 'linear'

In [13]:
mlflow.set_experiment("Exp_new3")

with mlflow.start_run():
    # Log hyperparameters
    mlflow.log_param("C", C)
    mlflow.log_param("kernel", kernel)
    mlflow.log_param("epochs", epochs)

    # Initialize and train the SVM model
    model = SVC(C=C, kernel=kernel, max_iter=100, probability=True)

    for epoch in range(epochs):
        model.fit(X_train, y_train)

        # Predict and calculate accuracy
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # Log the accuracy as a metric
        mlflow.log_metric("accuracy", accuracy, step=epoch)
        print(f"Epoch {epoch+1}/{epochs} - Accuracy: {accuracy:.4f}")

    # Save the model as an artifact
    model_filename = "svm_modelv9.pkl"
    mlflow.sklearn.save_model(model, model_filename)

    # Log the saved model as an artifact
    mlflow.log_artifact(model_filename)


Epoch 1/5 - Accuracy: 0.6428
Epoch 2/5 - Accuracy: 0.6428
Epoch 3/5 - Accuracy: 0.6428
Epoch 4/5 - Accuracy: 0.6428
Epoch 5/5 - Accuracy: 0.6428


RandomForestClassifierSVC


In [12]:
n_estimators = 200  
max_depth = 10 

In [14]:
mlflow.set_experiment("Exp_new3")

with mlflow.start_run():
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("epochs", epochs)


    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)

    for epoch in range(epochs):
        model.fit(X_train, y_train)

        
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

    
        mlflow.log_metric("accuracy", accuracy, step=epoch)
        print(f"Epoch {epoch+1}/{epochs} - Accuracy: {accuracy:.4f}")

    
    model_filename = "random_forest_model.pkl"
    mlflow.sklearn.save_model(model, model_filename)


    mlflow.log_artifact(model_filename)


Epoch 1/5 - Accuracy: 0.7967
Epoch 2/5 - Accuracy: 0.7977
Epoch 3/5 - Accuracy: 0.8023
Epoch 4/5 - Accuracy: 0.7943
Epoch 5/5 - Accuracy: 0.8030


Decision Tree

In [15]:
max_depth = 10  
min_samples_split = 4 

In [16]:
mlflow.set_experiment("Exp_new3")

with mlflow.start_run():
    # Log hyperparameters
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("min_samples_split", min_samples_split)
    mlflow.log_param("epochs", epochs)

    # Initialize and train the DecisionTree model
    model = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split)

    for epoch in range(epochs):
        model.fit(X_train, y_train)

        # Predict and calculate accuracy
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # Log the accuracy as a metric
        mlflow.log_metric("accuracy", accuracy, step=epoch)
        print(f"Epoch {epoch+1}/{epochs} - Accuracy: {accuracy:.4f}")

    # Save the model as an artifact
    model_filename = "decision_tree_model.pkl"
    mlflow.sklearn.save_model(model, model_filename)

    # Log the saved model as an artifact
    mlflow.log_artifact(model_filename)


Epoch 1/5 - Accuracy: 0.6765
Epoch 2/5 - Accuracy: 0.6767
Epoch 3/5 - Accuracy: 0.6775
Epoch 4/5 - Accuracy: 0.6763
Epoch 5/5 - Accuracy: 0.6767
