In [None]:
# sentiment_models.py
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from textblob import TextBlob
import numpy as np

def train_naive_bayes(X_train, y_train, alpha=1.0):
    """Trains a Naive Bayes classifier."""
    model = MultinomialNB(alpha=alpha)
    model.fit(X_train, y_train)
    return model

def train_logistic_regression(X_train, y_train, C=1.0, solver='liblinear', random_state=42):
    """Trains a Logistic Regression model."""
    model = LogisticRegression(C=C, solver=solver, random_state=random_state, max_iter=1000)
    model.fit(X_train, y_train)
    return model

def train_random_forest(X_train, y_train, n_estimators=100, random_state=42):
    """Trains a Random Forest Classifier."""
    model = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)
    model.fit(X_train, y_train)
    return model

def train_svm(X_train, y_train, C=1.0, kernel='rbf', random_state=42):
     """Trains an SVM Classifier."""
     model = SVC(C=C, kernel=kernel, random_state=random_state)
     model.fit(X_train, y_train)
     return model

def textblob_sentiment(text):
    """Performs sentiment analysis using TextBlob."""
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
      return "positive"
    elif analysis.sentiment.polarity < 0:
        return "negative"
    else:
        return "neutral"


def grid_search(model_type, X_train, y_train, param_grid):
  """Performs Grid Search for Hyperparameter Tuning."""
  if model_type == "naive_bayes":
      model = MultinomialNB()
  elif model_type == "logistic_regression":
       model = LogisticRegression(max_iter=1000)
  elif model_type == "random_forest":
        model = RandomForestClassifier(random_state=42)
  elif model_type == "svm":
        model = SVC(random_state=42)
  else:
       raise ValueError(f"Unsupported model type {model_type} for hyperparameter tuning")

  grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
  grid_search.fit(X_train, y_train)
  return grid_search.best_estimator_, grid_search.best_params_


def evaluate_model(model, X_test, y_test):
    """Evaluates a model's performance."""
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report


def main():
     try:
        import pandas as pd
        from feature_engineering import create_tfidf_features, create_average_word_embeddings, split_data, create_word2vec_features
        from data_preprocessing import preprocess_dataframe

        data = pd.read_csv("reviews.csv")
        data.dropna(subset=["text", "sentiment"], inplace=True)
        processed_df = preprocess_dataframe(data, "text")
        texts = processed_df["processed_text"].tolist()
        labels = processed_df["sentiment"].tolist()

        #TF-IDF
        tfidf_matrix, tfidf_vectorizer = create_tfidf_features(texts)
        X_train_tfidf, X_test_tfidf, y_train_tfidf, y_test_tfidf = split_data(tfidf_matrix, labels)

         #Average Word Embeddings
        avg_embeddings = create_average_word_embeddings(texts)
        X_train_avg_emb, X_test_avg_emb, y_train_avg_emb, y_test_avg_emb = split_data(avg_embeddings, labels)

         #Word2Vec embeddings
        word2vec_embeddings, word2vec_model = create_word2vec_features(texts)
        X_train_w2v, X_test_w2v, y_train_w2v, y_test_w2v = split_data(word2vec_embeddings, labels)

        # -- Train and Evaluate Naive Bayes (TF-IDF) --
        nb_model_tfidf = train_naive_bayes(X_train_tfidf.toarray(), y_train_tfidf)
        accuracy_nb_tfidf, report_nb_tfidf = evaluate_model(nb_model_tfidf, X_test_tfidf.toarray(), y_test_tfidf)
        print("Naive Bayes (TF-IDF) Accuracy:", accuracy_nb_tfidf)
        print("Naive Bayes (TF-IDF) Report:\n", report_nb_tfidf)

        # -- Train and Evaluate Logistic Regression (TF-IDF) --
        lr_model_tfidf = train_logistic_regression(X_train_tfidf.toarray(), y_train_tfidf)
        accuracy_lr_tfidf, report_lr_tfidf = evaluate_model(lr_model_tfidf, X_test_tfidf.toarray(), y_test_tfidf)
        print("Logistic Regression (TF-IDF) Accuracy:", accuracy_lr_tfidf)
        print("Logistic Regression (TF-IDF) Report:\n", report_lr_tfidf)

        # --- Train and Evaluate Random Forest (TF-IDF) --
        rf_model_tfidf = train_random_forest(X_train_tfidf.toarray(), y_train_tfidf)
        accuracy_rf_tfidf, report_rf_tfidf = evaluate_model(rf_model_tfidf, X_test_tfidf.toarray(), y_test_tfidf)
        print("Random Forest (TF-IDF) Accuracy:", accuracy_rf_tfidf)
        print("Random Forest (TF-IDF) Report:\n", report_rf_tfidf)

        # --- Train and Evaluate SVM (TF-IDF)
        svm_model_tfidf = train_svm(X_train_tfidf.toarray(), y_train_tfidf)
        accuracy_svm_tfidf, report_svm_tfidf = evaluate_model(svm_model_tfidf, X_test_tfidf.toarray(), y_test_tfidf)
        print("SVM (TF-IDF) Accuracy:", accuracy_svm_tfidf)
        print("SVM (TF-IDF) Report:\n", report_svm_tfidf)


        # -- Train and Evaluate Naive Bayes (Avg Embeddings) --
        nb_model_avg_emb = train_naive_bayes(X_train_avg_emb, y_train_avg_emb)
        accuracy_nb_avg_emb, report_nb_avg_emb = evaluate_model(nb_model_avg_emb, X_test_avg_emb, y_test_avg_emb)
        print("Naive Bayes (Avg. Embeddings) Accuracy:", accuracy_nb_avg_emb)
        print("Naive Bayes (Avg. Embeddings) Report:\n", report_nb_avg_emb)

        # -- Train and Evaluate Logistic Regression (Avg Embeddings) --
        lr_model_avg_emb = train_logistic_regression(X_train_avg_emb, y_train_avg_emb)
        accuracy_lr_avg_emb, report_lr_avg_emb = evaluate_model(lr_model_avg_emb, X_test_avg_emb, y_test_avg_emb)
        print("Logistic Regression (Avg. Embeddings) Accuracy:", accuracy_lr_avg_emb)
        print("Logistic Regression (Avg. Embeddings) Report:\n", report_lr_avg_emb)

        # --- Train and Evaluate Random Forest (Avg Embeddings) --
        rf_model_avg_emb = train_random_forest(X_train_avg_emb, y_train_avg_emb)
        accuracy_rf_avg_emb, report_rf_avg_emb = evaluate_model(rf_model_avg_emb, X_test_avg_emb, y_test_avg_emb)
        print("Random Forest (Avg. Embeddings) Accuracy:", accuracy_rf_avg_emb)
        print("Random Forest (Avg. Embeddings) Report:\n", report_rf_avg_emb)

        # --- Train and Evaluate SVM (Avg Embeddings)
        svm_model_avg_emb = train_svm(X_train_avg_emb, y_train_avg_emb)
        accuracy_svm_avg_emb, report_svm_avg_emb = evaluate_model(svm_model_avg_emb, X_test_avg_emb, y_test_avg_emb)
        print("SVM (Avg. Embeddings) Accuracy:", accuracy_svm_avg_emb)
        print("SVM (Avg. Embeddings) Report:\n", report_svm_avg_emb)

        # --- Train and Evaluate Naive Bayes (Word2Vec) --
        nb_model_w2v = train_naive_bayes(X_train_w2v, y_train_w2v)
        accuracy_nb_w2v, report_nb_w2v = evaluate_model(nb_model_w2v, X_test_w2v, y_test_w2v)
        print("Naive Bayes (Word2Vec) Accuracy:", accuracy_nb_w2v)
        print("Naive Bayes (Word2Vec) Report:\n", report_nb_w2v)

        # -- Train and Evaluate Logistic Regression (Word2Vec) --
        lr_model_w2v = train_logistic_regression(X_train_w2v, y_train_w2v)
        accuracy_lr_w2v, report_lr_w2v = evaluate_model(lr_model_w2v, X_test_w2v, y_test_w2v)
        print("Logistic Regression (Word2Vec) Accuracy:", accuracy_lr_w2v)
        print("Logistic Regression (Word2Vec) Report:\n", report_lr_w2v)

        # --- Train and Evaluate Random Forest (Word2Vec) --
        rf_model_w2v = train_random_forest(X_train_w2v, y_train_w2v)
        accuracy_rf_w2v, report_rf_w2v = evaluate_model(rf_model_w2v, X_test_w2v, y_test_w2v)
        print("Random Forest (Word2Vec) Accuracy:", accuracy_rf_w2v)
        print("Random Forest (Word2Vec) Report:\n", report_rf_w2v)

        # --- Train and Evaluate SVM (Word2Vec)
        svm_model_w2v = train_svm(X_train_w2v, y_train_w2v)
        accuracy_svm_w2v, report_svm_w2v = evaluate_model(svm_model_w2v, X_test_w2v, y_test_w2v)
        print("SVM (Word2Vec) Accuracy:", accuracy_svm_w2v)
        print("SVM (Word2Vec) Report:\n", report_svm_w2v)

        # Example of TextBlob usage
        example_text = "This is a fantastic product! I am very happy."
        textblob_result = textblob_sentiment(example_text)
        print(f"TextBlob sentiment of '{example_text}': {textblob_result}")
     except FileNotFoundError:
          print("Error: 'reviews.csv' not found. Please provide the data in a 'reviews.csv' file with 'text' and 'sentiment' columns.")
     except KeyError as e:
           print(f"Error: {e} column not found. Ensure your 'reviews.csv' has 'text' and 'sentiment' columns.")
     except Exception as e:
        print(f"An unexpected error occurred: {e}")

if __name__ == '__main__':
    main()