In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC

In [3]:
# Load the labelled dataset
data = pd.read_csv('train.csv', encoding='latin-1')

In [4]:
# Preprocess the labelled dataset
def preprocess(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text) # Remove mentions
    text = re.sub(r'https?:\/\/\S+', '', text) # Remove URLs
    text = re.sub(r'[^A-Za-z0-9]+', ' ', text) # Remove special characters
    text = text.lower() # Convert to lowercase
    words = nltk.word_tokenize(text) # Tokenize
    words = [word for word in words if word not in stopwords.words('english')] # Remove stop words
    words = [nltk.stem.PorterStemmer().stem(word) for word in words] # Stemming
    return ' '.join(words)

data['text'] = data['text'].apply(preprocess)

In [5]:
# Extract features from the preprocessed data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(data['text']).toarray()
y = data['sentiment']

In [6]:
# Train the SVM algorithm
svm = SVC(kernel='linear', decision_function_shape='ovr', random_state=0)
svm.fit(X, y)

SVC(kernel='linear', random_state=0)

In [7]:
# Define an array of tweets to predict the sentiments of
tweets = ["I love this movie!",
          "This restaurant is terrible.",
          "The weather is beautiful today.",
          "I hate Mondays.",
          "My phone is not working properly.",
          "I am so happy to be here.",
          "This traffic is making me crazy."]

In [8]:
# Preprocess the tweets
preprocessed_tweets = []
for tweet in tweets:
    preprocessed_tweet = preprocess(tweet)
    preprocessed_tweets.append(preprocessed_tweet)

In [9]:
# Extract features from the preprocessed tweets
X_tweets = vectorizer.transform(preprocessed_tweets).toarray()

In [10]:
# Predict the sentiments of the tweets using the trained SVM algorithm
y_tweets = svm.predict(X_tweets)

In [11]:
# Print the predicted sentiments of the tweets
print('Predicted sentiments of the tweets:')
print(y_tweets)

Predicted sentiments of the tweets:
['positive' 'negative' 'negative' 'negative' 'negative' 'positive'
 'negative']
