<a href="https://colab.research.google.com/github/Najia1809/sentiment-analysis/blob/main/modeltrainingFinal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install & Import Required Libraries

In [None]:
# 📦 Install necessary libraries
!pip install --upgrade gspread gspread_dataframe google-auth scikit-learn

# 📚 Basic Libraries
import pandas as pd
import numpy as np
import re
import nltk
import string
import matplotlib.pyplot as plt

# 📊 Sklearn Modules
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

# 🔐 Google Colab & Drive
from google.colab import drive
from google.colab import auth
import gspread
from google.auth import default
from gspread_dataframe import set_with_dataframe, get_as_dataframe
from google.colab import files


Collecting google-auth
  Downloading google_auth-2.40.1-py2.py3-none-any.whl.metadata (6.2 kB)
Downloading google_auth-2.40.1-py2.py3-none-any.whl (216 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m216.1/216.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-auth
  Attempting uninstall: google-auth
    Found existing installation: google-auth 2.38.0
    Uninstalling google-auth-2.38.0:
      Successfully uninstalled google-auth-2.38.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires google-auth==2.38.0, but you have google-auth 2.40.1 which is incompatible.[0m[31m
[0mSuccessfully installed google-auth-2.40.1


# Mount Google Drive & Authenticate Google Sheets

In [None]:
# Mount Drive
drive.mount('/content/drive')

# Authenticate
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)


Mounted at /content/drive


# Load Data from Google Sheets

In [None]:
# Open your spreadsheet
spreadsheet_name = " sentiment_spreadsheet"
spreadsheet = gc.open(spreadsheet_name)
worksheet = spreadsheet.sheet1

# Load sheet data into DataFrame
df = pd.DataFrame(worksheet.get_all_records())


print(df.head())


                                          Sentence Sentiment
0  The team did an outstanding job on the project.  Positive
1               I’m so proud of how far I’ve come.  Positive
2        This is the best day I’ve had in a while.  Positive
3            Everything went perfectly as planned.  Positive
4              She has a fantastic sense of humor.  Positive


In [None]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

# Text Preprocessing Function

In [None]:
import re
import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)  # remove URLs
    text = re.sub(r'\@w+|\#', '', text)  # remove mentions and hashtags
    text = re.sub(r'[^a-z\s]', '', text)  # remove special characters and numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # remove punctuation
    tokens = text.split()
    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)



[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Preprocess Sentiment Data Google Sheet

In [None]:
# Install required packages if not already installed
!pip install --upgrade gspread gspread_dataframe google-auth

# Import libraries
import re
import pandas as pd
import nltk
from nltk.corpus import stopwords
import gspread
from google.auth import default
from gspread_dataframe import set_with_dataframe

# Download NLTK stopwords
nltk.download('stopwords')

# Authenticate with Google Sheets
creds, _ = default()
gc = gspread.authorize(creds)

# Load your Google Spreadsheet
spreadsheet_name = ' sentiment_spreadsheet'  # Changed to your new name
spreadsheet = gc.open(spreadsheet_name)
worksheet = spreadsheet.sheet1

# Load data from the spreadsheet into a DataFrame
df = pd.DataFrame(worksheet.get_all_records())

import re
import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)  # remove URLs
    text = re.sub(r'\@w+|\#', '', text)  # remove mentions and hashtags
    text = re.sub(r'[^a-z\s]', '', text)  # remove special characters and numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # remove punctuation
    tokens = text.split()
    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)


# Apply preprocessing
df['preprocessed_text'] = df['Sentence'].apply(clean_text)

# Create new Google Sheet and upload
new_sheet_name = 'Preprocessed Sentiment Data'
sh = gc.create(new_sheet_name)
worksheet = sh.sheet1
set_with_dataframe(worksheet, df)

# Share the sheet with your email
your_email = 'najiakhan.se@gmail.com'
sh.share(your_email, perm_type='user', role='writer')

# Output the link to the spreadsheet
print("✅ Sheet created and shared!")
print(f"🔗 Open your sheet: https://docs.google.com/spreadsheets/d/{sh.id}")



[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


✅ Sheet created and shared!
🔗 Open your sheet: https://docs.google.com/spreadsheets/d/1F9pNgU0AiEoBjheLDdZkplpSPXseh3y-du1S0sEE4ao


# Extract Word-Level Sentiment Data from Preprocessed Text

In [None]:
# STEP 1: Load the sheet into df
spreadsheet_name = ' sentiment_spreadsheet'  # Updated name to 'sentiment_spreadsheet'
spreadsheet = gc.open(spreadsheet_name)
worksheet = spreadsheet.sheet1
df = pd.DataFrame(worksheet.get_all_records())

# STEP 2: Preprocess the 'Sentence' column
import re
import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)  # remove URLs
    text = re.sub(r'\@w+|\#', '', text)  # remove mentions and hashtags
    text = re.sub(r'[^a-z\s]', '', text)  # remove special characters and numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # remove punctuation
    tokens = text.split()
    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)


df['preprocessed_text'] = df['Sentence'].apply(clean_text)

# STEP 3: Split the cleaned text into words and attach sentiment
word_rows = []
for index, row in df.iterrows():
    sentiment = row['Sentiment']
    text = row['preprocessed_text']
    words = text.split()
    for word in words:
        word_rows.append({'Word': word, 'Sentiment': sentiment})

# STEP 4: Upload word-level data to a new Google Sheet
word_df = pd.DataFrame(word_rows)

word_sh = gc.create('Word-Level Sentiment from Preprocessed')
word_ws = word_sh.sheet1
set_with_dataframe(word_ws, word_df)

# STEP 5: Share the sheet
your_email = 'najiakhan.se@gmail.com'
word_sh.share(your_email, perm_type='user', role='writer')

# STEP 6: Output link
print("✅ Word-level sentiment sheet created and shared!")
print(f"🔗 View it: https://docs.google.com/spreadsheets/d/{word_sh.id}")


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


✅ Word-level sentiment sheet created and shared!
🔗 View it: https://docs.google.com/spreadsheets/d/1Xl0Hl2jiTIMRvBP-vDIAfOs5f7H92Bn14lngjdwyTuk


# Apply Preprocessing

In [None]:
# 🧼 Apply cleaning to text column
df['cleaned_text'] = df['Sentence'].apply(clean_text)
print(df[['Sentence', 'cleaned_text']].head())

                                          Sentence  \
0  The team did an outstanding job on the project.   
1               I’m so proud of how far I’ve come.   
2        This is the best day I’ve had in a while.   
3            Everything went perfectly as planned.   
4              She has a fantastic sense of humor.   

                  cleaned_text  
0    team outstand job project  
1        im proud far ive come  
2                 best day ive  
3  everyth went perfectli plan  
4           fantast sens humor  


# Train-Test Split & TF-IDF Vectorization

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    max_df=0.9,   # ignore words that appear in more than 90% of docs
    min_df=5,     # ignore rare words
    max_features=5000,
    ngram_range=(1,2)  # use unigrams and bigrams
)


In [None]:
# Features & Target
X = df['cleaned_text']
y = df['Sentiment']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


# Model Training

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(class_weight='balanced', max_iter=1000)


In [None]:
# Train Logistic Regression Model
model = LogisticRegression(class_weight='balanced', max_iter=1000)
model.fit(X_train_tfidf, y_train)


# Text Normalization for Cleaning Unicode and Special Characters

In [None]:
import unicodedata

def normalize_text(text):
    if isinstance(text, str):
        text = text.replace("’", "'").replace("‘", "'").replace("“", '"').replace("”", '"')
        text = unicodedata.normalize('NFKD', text)
        return text.encode('ascii', 'ignore').decode('utf-8')
    return text

df['Sentence'] = df['Sentence'].apply(normalize_text)


# training code

In [None]:
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ('tfidf', vectorizer),
    ('clf', model)
])

pipeline.fit(X_train, y_train)


In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

# 1. Predict on the training data
y_train_pred = model.predict(X_train_tfidf)

# 2. Create a DataFrame including the original text
results = pd.DataFrame({
    'Text': X_train,       # <- Original text (very helpful!)
    'Actual': y_train,

})

# 3. Save it to CSV
csv_filename = 'train_predictions_with_text.csv'
results.to_csv(csv_filename, index=False)

print(f"File '{csv_filename}' created successfully!")

# 4. Download it if using Colab
try:
    from google.colab import files
    files.download(csv_filename)
except:
    print("Download step is only for Colab or Jupyter. File is saved locally.")


File 'train_predictions_with_text.csv' created successfully!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Accuracy improvement

In [None]:
from sklearn.metrics import accuracy_score

# y_test: actual labels
# y_pred: predicted labels from your model

y_pred = pipeline.predict(X_test)  # or use model.predict if you're not using pipeline
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy of the model:", round(accuracy * 100, 2), "%")


Accuracy of the model: 58.33 %


In [None]:
from sklearn.metrics import classification_report

y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

    Negative       0.50      0.30      0.38        10
     Neutral       0.50      0.83      0.62         6
    Positive       0.50      0.50      0.50         8

    accuracy                           0.50        24
   macro avg       0.50      0.54      0.50        24
weighted avg       0.50      0.50      0.48        24



In [None]:
vectorizer = TfidfVectorizer(ngram_range=(1, 3), max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=10000)  # Increase max features


In [None]:
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000, stop_words='english')


In [None]:
model = LogisticRegression(class_weight='balanced', max_iter=1000, C=1.0)
model.fit(X_train_tfidf, y_train)


In [None]:
from sklearn.model_selection import cross_val_score
cross_val_score(model, X_train_tfidf, y_train, cv=5, scoring='accuracy')


array([0.3       , 0.42105263, 0.52631579, 0.52631579, 0.57894737])

In [None]:
# Step 1: Fit the TF-IDF Vectorizer
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 2: Fit the Logistic Regression Model
model = LogisticRegression(class_weight='balanced', max_iter=1000)
model.fit(X_train_tfidf, y_train)  # Fit model on the training data

# Step 3: After fitting the model and vectorizer, you can access the coefficients and feature names.
importance = model.coef_[0]  # Coefficients of the trained model
feature_names = vectorizer.get_feature_names_out()  # Get feature names
feature_importance = sorted(zip(importance, feature_names), reverse=True)

# Print top 10 important features
print(feature_importance[:10])


[(np.float64(0.5889352533949578), 'didnt'), (np.float64(0.4694986445355539), 'websit'), (np.float64(0.4694986445355539), 'complet'), (np.float64(0.41052671513260747), 'finish'), (np.float64(0.35856557444769843), 'bore'), (np.float64(0.3005299009133554), 'utter failur'), (np.float64(0.3005299009133554), 'utter'), (np.float64(0.3005299009133554), 'tone disrespect'), (np.float64(0.3005299009133554), 'tone'), (np.float64(0.3005299009133554), 'qualiti averag')]


In [None]:
importance = model.coef_[0]
feature_names = vectorizer.get_feature_names_out()
feature_importance = sorted(zip(importance, feature_names), reverse=True)
print(feature_importance[:10])  # Print top 10 important features


[(np.float64(0.5889352533949578), 'didnt'), (np.float64(0.4694986445355539), 'websit'), (np.float64(0.4694986445355539), 'complet'), (np.float64(0.41052671513260747), 'finish'), (np.float64(0.35856557444769843), 'bore'), (np.float64(0.3005299009133554), 'utter failur'), (np.float64(0.3005299009133554), 'utter'), (np.float64(0.3005299009133554), 'tone disrespect'), (np.float64(0.3005299009133554), 'tone'), (np.float64(0.3005299009133554), 'qualiti averag')]


In [None]:
from sklearn.metrics import accuracy_score

# y_test: actual labels
# y_pred: predicted labels from your model

y_pred = pipeline.predict(X_test)  # or use model.predict if you're not using pipeline
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy of the model:", round(accuracy * 100, 2), "%")

Accuracy of the model: 58.33 %


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)


Confusion Matrix:
[[5 2 3]
 [1 5 0]
 [2 2 4]]
Classification Report:
              precision    recall  f1-score   support

    Negative       0.62      0.50      0.56        10
     Neutral       0.56      0.83      0.67         6
    Positive       0.57      0.50      0.53         8

    accuracy                           0.58        24
   macro avg       0.58      0.61      0.59        24
weighted avg       0.59      0.58      0.58        24



In [None]:
# Remove rows where the 'Sentiment' is an empty string
df = df[df['Sentiment'] != '']


# testing

In [None]:
import pandas as pd

# Create a DataFrame with the actual text, actual labels, and predicted labels
output_df = pd.DataFrame({
    'Text': X_test,               # Original text
    'Actual Sentiment': y_test,   # Actual labels
    'Predicted Sentiment': y_pred # Predicted labels
})

# Save it to a CSV
output_df.to_csv('model_output.csv', index=False)

# Optionally, download the CSV if you're using Colab
try:
    from google.colab import files
    files.download('model_output.csv')
except:
    print("File saved as 'model_output.csv'.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# predictions

In [None]:
# ✅ Step 1: TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X_tfidf = vectorizer.fit_transform(X)

# ✅ Step 2: Train-Test Split (Fixed!)
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# ✅ Step 3: Train the Model
model = LogisticRegression(class_weight='balanced', max_iter=1000)
model.fit(X_train, y_train)

# ✅ Step 4: Map test data back to raw sentences
# Get the test indices to find original sentences
_, test_indices = train_test_split(df.index, test_size=0.2, random_state=42)
df_test = df.loc[test_indices].reset_index(drop=True)

# ✅ Step 5: Predict only on test data
rows = []
class_labels = model.classes_

for i, row in df_test.iterrows():
    sentence = row['Sentence']
    sentiment = row['Sentiment']
    text = row['cleaned_text']
    words = text.split()

    token_list = []
    confidence_list = []

    for word in words:
        word_tfidf = vectorizer.transform([word])
        if word_tfidf.nnz == 0:
            probs = [0] * len(class_labels)
        else:
            probs = model.predict_proba(word_tfidf)[0]
            probs = [int(p * 100) for p in probs]

        token_list.append(word)
        confidence_list.append(str(probs))

    # Sentence-level prediction
    sentence_tfidf = vectorizer.transform([text])
    sentence_prediction = model.predict(sentence_tfidf)[0]
    sentence_confidence = model.predict_proba(sentence_tfidf)[0]
    sentence_confidence = [int(p * 100) for p in sentence_confidence]

    rows.append({
        'sentence': sentence,
        'sentiment': sentiment,
        'prediction': sentence_prediction,
        'token': token_list,
        'confidence (positive, negative, neutral)': confidence_list,
        'confidence (sentence)': str(sentence_confidence)
    })

# ✅ Step 6: Save and Download
results_df = pd.DataFrame(rows)
results_df.to_csv("predictions_on_testing_data.csv", index=False)
files.download("predictions_on_testing_data.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:

import pandas as pd
df = pd.read_csv("predictions_on_testing_data.csv")
print("Correct predictions:")
for index, row in df.iterrows():
    if row["sentiment"] == row["prediction"]:
        print(f" {row['sentence']}  {row['prediction']}")


Correct predictions:
 The app keeps crashing every few minutes.  Negative
 I wasted hours trying to fix it.  Negative
 I received excellent customer service.  Positive
 The product broke after one day of use.  Negative
 They moved to a new apartment.  Neutral
 That joke made my day.  Positive
 This app really improved my productivity.  Positive
 He opened the window.  Neutral
 I am reading a novel.  Neutral
 I posted a photo online.  Neutral
 The team did an outstanding job on the project.  Positive
 The road is under construction.  Neutral
 I didn't enjoy the experience at all.  Negative
 I was treated unfairly.  Negative


In [None]:
import pandas as pd
df = pd.read_csv("predictions_on_testing_data.csv")
print("Wrong predictions:")
for index, row in df.iterrows():
    if row["sentiment"] != row["prediction"]:
        print(f" {row['sentence']}  {row['prediction']} (Expected: {row['sentiment']})")


Wrong predictions:
 He always finds something to complain about.  Positive (Expected: Negative)
 She has a fantastic sense of humor.  Neutral (Expected: Positive)
 It was the worst hotel I've ever stayed in.  Positive (Expected: Negative)
 The staff went above and beyond.  Negative (Expected: Positive)
 The delivery was delayed by a week.  Neutral (Expected: Negative)
 The concert was a disaster.  Neutral (Expected: Negative)
 She helped me when no one else did.  Negative (Expected: Positive)
 The train arrived on time.  Negative (Expected: Neutral)
 The customer support was rude and unhelpful.  Positive (Expected: Negative)
 This room is so cozy and relaxing.  Neutral (Expected: Positive)


In [None]:
import pandas as pd
import ast  # String ko list mein badalne ke liye

# CSV load karo
df = pd.read_csv("predictions_on_testing_data.csv")

# Har row ke liye confidence list ko evaluate karo (string → list)
df["confidence (sentence)"] = df["confidence (sentence)"].apply(ast.literal_eval)

# Har sentence ka average nikaalo
df["average_confidence"] = df["confidence (sentence)"].apply(lambda x: sum(x)/len(x))

# Show karo sentence ke sath uska average
for index, row in df.iterrows():
    print(f"📝 {row['sentence']}\n🔍 Confidence: {row['confidence (sentence)']} → 📊 Average: {row['average_confidence']:.2f}\n")


📝 He always finds something to complain about.
🔍 Confidence: [30, 32, 37] → 📊 Average: 33.00

📝 The app keeps crashing every few minutes.
🔍 Confidence: [36, 32, 31] → 📊 Average: 33.00

📝 She has a fantastic sense of humor.
🔍 Confidence: [32, 34, 32] → 📊 Average: 32.67

📝 It was the worst hotel I've ever stayed in.
🔍 Confidence: [32, 30, 37] → 📊 Average: 33.00

📝 The staff went above and beyond.
🔍 Confidence: [38, 31, 29] → 📊 Average: 32.67

📝 The delivery was delayed by a week.
🔍 Confidence: [32, 34, 32] → 📊 Average: 32.67

📝 I wasted hours trying to fix it.
🔍 Confidence: [36, 32, 30] → 📊 Average: 32.67

📝 I received excellent customer service.
🔍 Confidence: [32, 29, 37] → 📊 Average: 32.67

📝 The product broke after one day of use.
🔍 Confidence: [40, 28, 31] → 📊 Average: 33.00

📝 They moved to a new apartment.
🔍 Confidence: [34, 36, 29] → 📊 Average: 33.00

📝 That joke made my day.
🔍 Confidence: [34, 30, 35] → 📊 Average: 33.00

📝 The concert was a disaster.
🔍 Confidence: [32, 34, 32] → 

In [None]:
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Assume you have a DataFrame 'df' with columns 'cleaned_text' and 'Sentiment'
# For example:
# df = pd.DataFrame({
#     'cleaned_text': ['text1', 'text2', ...],
#     'Sentiment': ['positive', 'negative', ...]
# })

X = df['cleaned_text']  # Feature: cleaned text
y = df['Sentiment']  # Target: Sentiment labels

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the vectorizer and model
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)

model = LogisticRegression(class_weight='balanced', max_iter=1000)
model.fit(X_train_tfidf, y_train)

# Save the model and vectorizer
joblib.dump(model, 'sentiment_model.joblib')
joblib.dump(vectorizer, 'vectorizer.joblib')

print("Model and Vectorizer saved successfully.")


Model and Vectorizer saved successfully.


In [None]:
from google.colab import files

# Download the sentiment model file
files.download('sentiment_model.joblib')

# Download the vectorizer file
files.download('vectorizer.joblib')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>