<a href="https://colab.research.google.com/github/mazen200/sentiment_analysis-/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing libraries**

In [115]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report,f1_score,recall_score,precision_score
import nltk
from nltk.corpus import stopwords
import string

# **Load the dataset**

In [116]:
data = pd.read_csv("/content/sentimentdataset.csv")

In [117]:
data.head()

Unnamed: 0,ID,Text,Sentiment (Label),Timestamp,User,Source,Topic,Retweets,Likes,Country,Year,Month,Day,Hour
0,0,Enjoying a beautiful day at the park! ...,Positive,1/15/2023 12:30,User123,Twitter,#Nature #Park,15,30,USA,2023,1,15,12
1,1,Traffic was terrible this morning. ...,Negative,1/15/2023 8:45,CommuterX,Twitter,#Traffic #Morning,5,10,Canada,2023,1,15,8
2,2,Just finished an amazing workout! 💪 ...,Positive,1/15/2023 15:45,FitnessFan,Instagram,#Fitness #Workout,20,40,USA,2023,1,15,15
3,3,Excited about the upcoming weekend getaway! ...,Positive,1/15/2023 18:20,AdventureX,Facebook,#Travel #Adventure,8,15,UK,2023,1,15,18
4,4,Trying out a new recipe for dinner tonight. ...,Neutral,1/15/2023 19:55,ChefCook,Instagram,#Cooking #Food,12,25,Australia,2023,1,15,19


# **Data Preprocessing**

In [118]:
from sklearn.preprocessing import LabelEncoder #encoding
def Feature_Encoder(X,cols):
    for c in cols:
        lbl = LabelEncoder()
        lbl.fit(list(X[c].values))
        X[c] = lbl.transform(list(X[c].values))
    return X

In [119]:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))  # Set of English stopwords
translator = str.maketrans('', '', string.punctuation)  # Translator to remove punctuation
data['Text'] = data['Text'].apply(lambda x: ' '.join([word.translate(translator) for word in x.lower().split() if word not in stop_words]))  # Remove stopwords and punctuation, and convert to lowercase
data.drop(columns=['User','Timestamp','Source','Retweets',	'Likes','ID',	'Country','Year','Month',	'Day','Hour'], inplace=True)
data = data.assign(Topic=data['Topic'].str.split('#')).explode('Topic')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [120]:
cols=('Topic','Sentiment (Label)')
data = Feature_Encoder(data.iloc[:,:3],cols)
data =data[data['Topic']!=0]
data.reset_index(drop=True,inplace=True)

In [121]:
data.head(10)

Unnamed: 0,Text,Sentiment (Label),Topic
0,enjoying beautiful day park,214,636
1,enjoying beautiful day park,214,690
2,traffic terrible morning,195,977
3,traffic terrible morning,195,618
4,finished amazing workout 💪,214,325
5,finished amazing workout 💪,214,1033
6,excited upcoming weekend getaway,214,982
7,excited upcoming weekend getaway,214,18
8,trying new recipe dinner tonight,197,183
9,trying new recipe dinner tonight,197,336


# **Feature Engineering**

In [122]:
tfidf_vectorizer = CountVectorizer(max_features=1000)  # Using TF-IDF for feature extraction
X = tfidf_vectorizer.fit_transform(data['Text'])  # Transform text data into numerical features
y = data['Sentiment (Label)']  # Target labels

# **Splitting data into training and testing sets**

In [123]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#**Model Selection and Training**


In [124]:
svm_model = SVC(kernel='linear')  # Using Support Vector Machine (SVM)
svm_model.fit(X_train, y_train)

# **Model Evaluation**


In [125]:
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test,y_pred,average ='weighted')
recall = recall_score(y_test, y_pred,average ='weighted')
f1sc = f1_score(y_test, y_pred,average ='weighted')

print("Accuracy:", accuracy)
print("precision : " , precision)
print("recall : ", recall)
print("f1 score : ",f1_score )
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.8907849829351536
precision :  0.8790946594715346
recall :  0.8907849829351536
f1 score :  <function f1_score at 0x787d20110ca0>
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         4
           2       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           8       1.00      1.00      1.00         1
          11       1.00      1.00      1.00         1
          12       1.00      1.00      1.00         3
          15       0.00      0.00      0.00         0
          16       1.00      1.00      1.00         1
          17       1.00      1.00      1.00         1
          21       1.00      1.00      1.00         1
          22       1.00      1.00      1.00         3
          24       1.00      1.00      1.00         2
          26       1.00      1.00      1.00         1
          28       1.00   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
