In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
# Load the dataset
df = pd.read_csv('Movie.csv', encoding='ISO-8859-1', header=None)
pd.set_option('display.max_colwidth', None)
df 


In [None]:

# Assign column names
df.columns = ['Id','User_id','date','type','user_name','text']




In [None]:

# Drop irrelevant columns
df.drop(['User_id', 'date', 'type', 'user_name'],axis =1)



In [None]:

def preprocess(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub('[^a-zA-Z0-9]', ' ', text)  # Remove special characters
    text = text.lower()  # Convert to lowercase
    text = nltk.word_tokenize(text)  # Tokenize
    text = [word for word in text if word not in stopwords.words('english')]  # Remove stop words
    text = ' '.join(text)  # Convert list to string
    return text
df['text'] = df['text'].apply(preprocess)

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['Id'], test_size=0.2, random_state=42)




In [23]:
# Convert text to features
cv = CountVectorizer()
X_train_counts = cv.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)


In [None]:
# Train a logistic regression model
lgr = LogisticRegression(solver = 'lbfgs', max_iter = 2500,random_state = 1234)
lgr.fit(X_train_tfidf, y_train)

In [None]:
# Evaluate the model on the testing data
X_test_counts = cv.transform(X_test)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)
y_pred = lgr.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

In [24]:
#Predict the sentiment of a particular tweet
tweet = " no, it's not behaving at all. i'm mad. why am i here? because I can't see you all over there"
tweet_counts = cv.transform([tweet])
tweet_tfidf = tfidf_transformer.transform(tweet_counts)
sentiment =lgr.predict(tweet_tfidf)[0]
if sentiment == 0:
    print('Negative')
else:
    print('Positive')
print(sentiment)

Negative
0
