In [60]:
import sklearn
import pickle
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [86]:
sentiment_data = pd.read_csv('datasets/sentimental_analysis_data.csv',
                             header=None,
                             names=['Label', 'Text'],
                             sep='\t')

sentiment_data.sample(10)

Unnamed: 0,Label,Text
6130,0,", she helped me bobbypin my insanely cool hat ..."
241,1,I wanted desperately to love'The Da Vinci Code...
4445,0,Da Vinci Code sucks.
4813,0,i heard da vinci code sucked soo much only 2.5...
2340,1,I am going to start reading the Harry Potter s...
12,1,then I turn on the light and the radio and enj...
3528,1,man i loved brokeback mountain!
1568,1,we're gonna like watch Mission Impossible or H...
5905,0,These Harry Potter movies really suck.
5921,0,I think I hate Harry Potter because it outshin...


In [87]:
X = sentiment_data['Text']
Y = sentiment_data['Label']

In [88]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [89]:
tfidf_vect = TfidfVectorizer(max_features=15)

In [90]:
logistic_clf = LogisticRegression(solver='liblinear')

In [91]:
clf_pipeline = Pipeline(steps=[('tfidf_vect', tfidf_vect), ('classifier', logistic_clf)])
pipeline_model = clf_pipeline.fit(x_train, y_train)

In [92]:
y_pred = pipeline_model.predict(x_test)

In [93]:
Accuracy_score = accuracy_score(y_test, y_pred)
Accuracy_score

0.880057803468208

In [94]:
pickle.dump(pipeline_model, open('models/logistic_clf/model.pkl', 'wb'))

In [95]:
decision_tree_clf = DecisionTreeClassifier(max_depth=10)

In [96]:
clf_pipeline = Pipeline(steps=[('tfidf_vect', tfidf_vect), ('classifier', decision_tree_clf)])
pipeline_model = clf_pipeline.fit(x_train, y_train)

In [97]:
y_pred = pipeline_model.predict(x_test)

In [98]:
Accuracy_score = accuracy_score(y_test, y_pred)
Accuracy_score

0.8807803468208093

In [99]:
from sklearn.externals import joblib
joblib.dump(pipeline_model, 'models/decision_tree_clf/model.joblib')



['models/decision_tree_clf/model.joblib']

In [100]:
pickle.dump(pipeline_model, open('models/decision_tree_clf/model.pkl', 'wb'))

In [55]:
linear_svc_clf = LinearSVC(C=1.0, max_iter=100)

In [56]:
clf_pipeline = Pipeline(steps=[('tfidf_vect', tfidf_vect), ('classifier', linear_svc_clf)])
pipeline_model = clf_pipeline.fit(x_train, y_train)

In [57]:
y_pred = pipeline_model.predict(x_test)

In [58]:
Accuracy_score = accuracy_score(y_test, y_pred)
Accuracy_score

0.8894508670520231

In [59]:
pickle.dump(pipeline_model, open('models/linear_svc_clf/model.pkl', 'wb'))

In [82]:
import pickle

In [83]:
model = pickle.load(open('models/decision_tree_clf/model.pkl', 'rb'))

In [84]:
pred = model.predict(x_test)

In [85]:
pred

array([1, 1, 0, ..., 1, 0, 1])

In [71]:
sklearn.__version__

'0.22.2.post1'