In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from textblob import TextBlob
from nltk.corpus import stopwords
from collections import Counter
import warnings; warnings.simplefilter('ignore')
import nltk
import string
from nltk import ngrams
from nltk.tokenize import word_tokenize 
from nltk.stem import SnowballStemmer

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing


In [None]:

df_train = pd.read_csv("../input/drugsComTest_raw.csv",encoding='latin1')
df_test =  pd.read_csv("../input/drugsComTrain_raw.csv",encoding='latin1')




In [None]:

df_train.dtypes

In [None]:
df_train.head()

In [None]:

print ("The shape of the train set given is : ", df_train.shape)
print ("The shape of the test set given is : ", df_test.shape)

In [None]:
# Droping the data points with null values as it's very much less tha 5% of the whole dataset
df_train = df_train.dropna(how = 'any', axis = 0)
df_test = df_test.dropna(how = 'any', axis = 0)

In [None]:
print ("The shape of the train df :",df_train.shape)
print ("The shape of the test df :", df_test.shape)

In [None]:
# Merging the test and train data 
merge = [df_train, df_test]
df_data = pd.concat(merge)
df_data.head(10)

In [None]:
df_data.shape

In [None]:

df_data.columns = df_data.columns.str.lower()

In [None]:

# Sorting the dataframe
df_data.sort_values(['uniqueid'], ascending = True, inplace = True)
df_data.reset_index(drop = True, inplace = True)

In [None]:
df_data['condition'].isnull().sum(axis = 0)

In [None]:
df_data.dtypes

In [None]:

df_data.head(10)

In [None]:

# Converting the date in to date time format
df_data['date'] = pd.to_datetime(df_data['date'])

In [None]:
# Top 20 unigrams according to the rating
df_ = df_data[['rating', 'review']]
df_['review'] = df_data['review'].str.replace("&#039;", "")
df_['review'] = df_['review'].str.replace(r'[^\w\d\s]',' ')

df_review_5 = " ".join(df_.loc[df_.rating <= 5, 'review'])
df_review_10 = " ".join(df_.loc[df_.rating > 5, 'review'])

token_review_5 = word_tokenize(df_review_5)
token_review_10 = word_tokenize(df_review_10)

unigrams_5 = ngrams(token_review_5, 1)
unigrams_10 = ngrams(token_review_10, 1)

frequency_5 = Counter(unigrams_5)
frequency_10 = Counter(unigrams_10)

df_5 = pd.DataFrame(frequency_5.most_common(20))
df_10 = pd.DataFrame(frequency_10.most_common(20))

# Barplot that shows the top 20 unigrams
plt.rcParams['figure.figsize'] = [20,11]
fig, ax = plt.subplots(1,2)
sns.set(font_scale = 1.5, style = 'whitegrid')

sns_5 = sns.barplot(x = df_5[1], y = df_5[0], color = 'lightsteelblue', ax = ax[0])
sns_10 = sns.barplot(x = df_10[1], y = df_10[0], color = 'lightsteelblue', ax = ax[1])

# Setting axes labels
sns_5.set_title("Top 20 unigrams according for rating <= 5")
sns_10.set_title("Top 20 unigrams according for rating > 5")
sns_5.set_ylabel("Unigrams");

In [None]:
# Top 20 bigrams according to the rating
bigrams_5 = ngrams(token_review_5, 2)
bigrams_10 = ngrams(token_review_10, 2)

frequency_5 = Counter(bigrams_5)
frequency_10 = Counter(bigrams_10)

df_5 = pd.DataFrame(frequency_5.most_common(20))
df_10 = pd.DataFrame(frequency_10.most_common(20))

# Barplot that shows the top 20 bigrams
plt.rcParams['figure.figsize'] = [22,11]
fig, ax = plt.subplots(1,2)
sns.set(font_scale = 1.3, style = 'whitegrid')

sns_5 = sns.barplot(x = df_5[1], y = df_5[0], color = 'red', ax = ax[0])
sns_10 = sns.barplot(x = df_10[1], y = df_10[0], color = 'red', ax = ax[1])

# Setting axes labels
sns_5.set_title("Top 20 bigrams according for rating <= 5")
sns_10.set_title("Top 20 bigrams according for rating > 5")
sns_5.set_ylabel("bigrams");


In [None]:
# Top 20 trigrams according to the rating
trigrams_5 = ngrams(token_review_5, 3)
trigrams_10 = ngrams(token_review_10, 3)

frequency_5 = Counter(trigrams_5)
frequency_10 = Counter(trigrams_10)

df_5 = pd.DataFrame(frequency_5.most_common(20))
df_10 = pd.DataFrame(frequency_10.most_common(20))

# Barplot that shows the top 20 trigrams
plt.rcParams['figure.figsize'] = [25,13]
fig, ax = plt.subplots(1,2)
sns.set(font_scale = 1.3, style = 'whitegrid')

sns_5 = sns.barplot(x = df_5[1], y = df_5[0], color = 'orange', ax = ax[0])
sns_10 = sns.barplot(x = df_10[1], y = df_10[0], color = 'orange', ax = ax[1])

# Setting axes labels
sns_5.set_title("Top 20 trigrams according for rating <= 5")
sns_10.set_title("Top 20 trigrams according for rating > 5")
sns_5.set_ylabel("trigrams");

In [None]:
# Giving the Sentiment according to the ratings
df_data['sentiment_rate'] = df_data['rating'].apply(lambda x: 1 if x > 5 else 0)

In [None]:

def review_clean(review): 
    # changing to lower case
    lower = review.str.lower()
    
    # Replacing the repeating pattern of &#039;
    pattern_remove = lower.str.replace("&#039;", "")
    
    # Removing all the special Characters
    special_remove = pattern_remove.str.replace(r'[^\w\d\s]',' ')
    
    # Removing all the non ASCII characters
    ascii_remove = special_remove.str.replace(r'[^\x00-\x7F]+',' ')
    
    # Removing the leading and trailing Whitespaces
    whitespace_remove = ascii_remove.str.replace(r'^\s+|\s+?$','')
    
    # Replacing multiple Spaces with Single Space
    multiw_remove = whitespace_remove.str.replace(r'\s+',' ')
    
    # Replacing Two or more dots with one
    dataframe = multiw_remove.str.replace(r'\.{2,}', ' ')
    
    return dataframe

In [None]:
#df_data['review_clean'] = df_data['review'].apply(review_clean)
df_data['review_clean'] = review_clean(df_data['review'])

In [None]:
# Removing the stopwords
stop_words = set(stopwords.words('english'))
df_data['review_clean'] = df_data['review_clean'].apply(lambda x: ' '.join(word for word in x.split() if word not in stop_words))

In [None]:
# Removing the word stems using the Snowball Stemmer
Snow_ball = SnowballStemmer("english")
df_data['review_clean'] = df_data['review_clean'].apply(lambda x: " ".join(Snow_ball.stem(word) for word in x.split()))

In [None]:
df_data['review_clean'][0: 20]

In [None]:
# Separating the day, month and year from the Date
df_data['day'] = df_data['date'].dt.day
df_data['month'] = df_data['date'].dt.month
df_data['year'] = df_data['date'].dt.year

In [None]:
df_data.head()

In [None]:
def sentiment(review):
    # Sentiment polarity of the reviews
    pol = []
    for i in review:
        analysis = TextBlob(i)
        pol.append(analysis.sentiment.polarity)
    return pol

In [None]:
df_data.head()

In [None]:
df_data['sentiment'] = sentiment(df_data['review'])

In [None]:
df_data['sentiment_clean'] = sentiment(df_data['review_clean'])

In [None]:
np.corrcoef(df_data['sentiment'], df_data['rating'])

In [None]:

np.corrcoef(df_data['sentiment_clean'], df_data['rating'])

In [None]:
# Cleaning the reviews without removing the stop words and using snowball stemmer
df_data['review_clean_ss'] = review_clean(df_data['review'])
df_data['sentiment_clean_ss'] = sentiment(df_data['review_clean_ss'])

In [None]:

np.corrcoef(df_data['sentiment_clean_ss'], df_data['rating'])

In [None]:
df_data['condition'].value_counts().head(20)

In [None]:
df_data.head(10)

In [None]:
#Word count in each review
df_data['count_word']=df_data["review_clean_ss"].apply(lambda x: len(str(x).split()))

#Unique word count 
df_data['count_unique_word']=df_data["review_clean_ss"].apply(lambda x: len(set(str(x).split())))

#Letter count
df_data['count_letters']=df_data["review_clean_ss"].apply(lambda x: len(str(x)))

#punctuation count
df_data["count_punctuations"] = df_data["review"].apply(lambda x: len([c for c in str(x) if c in string.punctuation]))

#upper case words count
df_data["count_words_upper"] = df_data["review"].apply(lambda x: len([w for w in str(x).split() if w.isupper()]))

#title case words count
df_data["count_words_title"] = df_data["review"].apply(lambda x: len([w for w in str(x).split() if w.istitle()]))

#Number of stopwords
df_data["count_stopwords"] = df_data["review"].apply(lambda x: len([w for w in str(x).lower().split() if w in stop_words]))

#Average length of the words
df_data["mean_word_len"] = df_data["review_clean_ss"].apply(lambda x: np.mean([len(w) for w in str(x).split()]))

In [None]:
df_data.columns

In [None]:
# Correlation Heatmap of the features engineered
plt.rcParams['figure.figsize'] = [17,15]
sns.set(font_scale = 1.2)
corr = df_data.select_dtypes(include = 'int64').corr()
sns_ = sns.heatmap(corr, annot = True, cmap = 'YlGnBu')
plt.setp(sns_.get_xticklabels(), rotation = 45);

In [None]:
df_data.head(10)

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder_feat = {}
for feature in ['drugname', 'condition']:
    label_encoder_feat[feature] = LabelEncoder()
    df_data[feature] = label_encoder_feat[feature].fit_transform(df_data[feature])

In [None]:
df_data.head(10)

In [None]:
# Importing Libraries for the Machine Learning Model

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [None]:
# Defining Features and splitting the data as train and test set

features = df_data[['condition', 'usefulcount', 'sentiment_clean_ss',
                   'count_word', 'count_unique_word', 'count_letters',
                   'count_punctuations', 'count_words_upper', 'count_words_title',
                   'count_stopwords', 'mean_word_len']]

target = df_data['sentiment_rate']

X_train, X_test, y_train,y_test = train_test_split(features, target, test_size = 0.3,random_state = 42)
print ("The Train set size ", X_train.shape)
print ("The Test set size ", X_test.shape)

In [None]:
#get rid of infinite values.

X_train.replace([np.inf, -np.inf], np.nan, inplace=True)
y_train.replace([np.inf, -np.inf], np.nan, inplace=True)

In [None]:
#get rid of null values and missing values

X_train.fillna(X_train.mean(), inplace=True)
y_train.fillna(y_train.mean(), inplace=True)
X_test.fillna(X_test.mean(), inplace=True)
y_test.fillna(y_test.mean(), inplace=True)


In [None]:
#for Nural Network model

MLP = MLPClassifier()
MLP = MLP.fit(X_train,  y_train)
y_pred = MLP.predict(X_test)
y_pred
score = accuracy_score(y_test, y_pred) * 100
print("Accuracy using Neural Network: ", round(score, 1), "%" )


# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print(cm)

#plot confusion Matrix
plt.clf()
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('Confusion Matrix')
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]
for i in range(2):
    for j in range(2):
        plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))
plt.show()

tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("True Negatives: ",tn)
print("False Positives: ",fp)
print("False Negatives: ",fn)
print("True Positives: ",tp)

#Accuracy
Accuracy = (tn+tp)*100/(tp+tn+fp+fn) 
print("Accuracy using Neural Network Classifier: ", round(Accuracy, 1), "%" )

#Precision 
Precision = tp/(tp+fp) 
print("Precision using Neural Network Classifier: ", round(Precision, 1), "%" )

#Recall 
Recall = tp/(tp+fn) 
print("Recall using Neural Network Classifier: ", round(Recall, 1), "%" )

#F1 Score
f1 = (2*Precision*Recall)/(Precision + Recall)
print("F1 score using Neural Network Classifier: ", round(f1, 1), "%" )

#F-beta score calculation
def fbeta(precision, recall, beta):
    return ((1+pow(beta,2))*precision*recall)/(pow(beta,2)*precision + recall)
            
f2 = fbeta(Precision, Recall, 2)
print("F2 score using Neural Network Classifier: ", round(f2, 1), "%" )

f0_5 = fbeta(Precision, Recall, 0.5)
print("F0.5 score using Neural Network Classifier: ", round(f0_5, 1), "%" )


In [None]:
#for Naive Bayes model

Nb=GaussianNB()
Nb= Nb.fit(X_train,  y_train)
y_pred=Nb.predict(X_test)
y_pred
score = accuracy_score(y_test, y_pred) * 100
print("Accuracy using Naive Bayes: ", round(score, 1), "%" )


# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print(cm)


#plot confusion Matrix
plt.clf()
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('Confusion Matrix')
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]
for i in range(2):
    for j in range(2):
        plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))
plt.show()

tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("True Negatives: ",tn)
print("False Positives: ",fp)
print("False Negatives: ",fn)
print("True Positives: ",tp)

#Accuracy
Accuracy = (tn+tp)*100/(tp+tn+fp+fn) 
print("Accuracy using Naive Bayes Classifier: ", round(Accuracy, 1), "%" )

#Precision 
Precision = tp/(tp+fp) 
print("Precision using Naive Bayes Classifier: ", round(Precision, 1), "%" )

#Recall 
Recall = tp/(tp+fn) 
print("Recall using Naive Bayes Classifier: ", round(Recall, 1), "%" )

#F1 Score
f1 = (2*Precision*Recall)/(Precision + Recall)
print("F1 score using Naive Bayes Classifier: ", round(f1, 1), "%" )

#F-beta score calculation
def fbeta(precision, recall, beta):
    return ((1+pow(beta,2))*precision*recall)/(pow(beta,2)*precision + recall)
            
f2 = fbeta(Precision, Recall, 2)
print("F2 score using Naive Bayes Classifier: ", round(f2, 1), "%" )

f0_5 = fbeta(Precision, Recall, 0.5)
print("F0.5 score using Naive Bayes Classifier: ", round(f0_5, 1), "%" )





In [None]:
#For Decession tree Model

dt= DecisionTreeClassifier(min_samples_split = 100, criterion='entropy')
dt = dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
y_pred
score = accuracy_score(y_test, y_pred) * 100
print("Accuracy using Desicion Tree: ", round(score, 1), "%" )


# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print(cm)


#plot confusion Matrix

plt.clf()
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('Confusion Matrix')
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]
for i in range(2):
    for j in range(2):
        plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))
plt.show()

tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("True Negatives: ",tn)
print("False Positives: ",fp)
print("False Negatives: ",fn)
print("True Positives: ",tp)

#Accuracy
Accuracy = (tn+tp)*100/(tp+tn+fp+fn) 
print("Accuracy using Desicion Tree Classifier: ", round(Accuracy, 1), "%" )

#Precision 
Precision = tp/(tp+fp) 
print("Precision using Desicion Tree Classifier: ", round(Precision, 1), "%" )

#Recall 
Recall = tp/(tp+fn) 
print("Recall using Desicion Tree Classifier: ", round(Recall, 1), "%" )

#F1 Score
f1 = (2*Precision*Recall)/(Precision + Recall)
print("F1 score using Desicion Tree Classifier: ", round(f1, 1), "%" )

#F-beta score calculation
def fbeta(precision, recall, beta):
    return ((1+pow(beta,2))*precision*recall)/(pow(beta,2)*precision + recall)
            
f2 = fbeta(Precision, Recall, 2)
print("F2 score using Desicion Tree Classifier: ", round(f2, 1), "%" )

f0_5 = fbeta(Precision, Recall, 0.5)
print("F0.5 score using Desicion Tree Classifier: ", round(f0_5, 1), "%" )




In [None]:
#For Random Forest model

RF = RandomForestClassifier(n_estimators=100, max_depth=2,
                             random_state=0)
RF = RF.fit(X_train,  y_train)
y_pred = RF.predict(X_test)
y_pred

score = accuracy_score(y_test, y_pred) * 100
print("Accuracy using Random Forest Classifier: ", round(score, 1), "%" )


# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print(cm)

#plot confusion Matrix

plt.clf()
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('Confusion Matrix')
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]
for i in range(2):
    for j in range(2):
        plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))
plt.show()



tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("True Negatives: ",tn)
print("False Positives: ",fp)
print("False Negatives: ",fn)
print("True Positives: ",tp)

#Accuracy
Accuracy = (tn+tp)*100/(tp+tn+fp+fn) 
print("Accuracy using Random Forest Classifier: ", round(Accuracy, 1), "%" )

#Precision 
Precision = tp/(tp+fp) 
print("Precision using Random Forest Classifier: ", round(Precision, 1), "%" )

#Recall 
Recall = tp/(tp+fn) 
print("Recall using Random Forest Classifier: ", round(Recall, 1), "%" )

#F1 Score
f1 = (2*Precision*Recall)/(Precision + Recall)
print("F1 score using Random Forest Classifier: ", round(f1, 1), "%" )

#F-beta score calculation
def fbeta(precision, recall, beta):
    return ((1+pow(beta,2))*precision*recall)/(pow(beta,2)*precision + recall)
            
f2 = fbeta(Precision, Recall, 2)
print("F2 score using Random Forest Classifier: ", round(f2, 1), "%" )

f0_5 = fbeta(Precision, Recall, 0.5)
print("F0.5 score using Random Forest Classifier: ", round(f0_5, 1), "%" )



In [None]:
#For Logistic Regression Model

LR = LogisticRegression()
LR.fit(X_train,y_train)
y_pred = LR.predict(X_test)
score = accuracy_score(y_test, y_pred) * 100
print("Accuracy using Logistic Regression Classifier: ", round(score, 1), "%" )

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print(cm)

#plot confusion Matrix

plt.clf()
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('Confusion Matrix')
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]
for i in range(2):
    for j in range(2):
        plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))
plt.show()






tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("True Negatives: ",tn)
print("False Positives: ",fp)
print("False Negatives: ",fn)
print("True Positives: ",tp)

#Accuracy
Accuracy = (tn+tp)*100/(tp+tn+fp+fn) 
print("Accuracy using Logistic Regression Classifier: ", round(Accuracy, 1), "%" )

#Precision 
Precision = tp/(tp+fp) 
print("Precision using Logistic Regression Classifier: ", round(Precision, 1), "%" )

#Recall 
Recall = tp/(tp+fn) 
print("Recall using Logistic Regression Classifier: ", round(Recall, 1), "%" )

#F1 Score
f1 = (2*Precision*Recall)/(Precision + Recall)
print("F1 score using Logistic Regression Classifier: ", round(f1, 1), "%" )

#F-beta score calculation
def fbeta(precision, recall, beta):
    return ((1+pow(beta,2))*precision*recall)/(pow(beta,2)*precision + recall)
            
f2 = fbeta(Precision, Recall, 2)
print("F2 score using Logistic Regression Classifier: ", round(f2, 1), "%" )

f0_5 = fbeta(Precision, Recall, 0.5)
print("F0.5 score using Logistic Regression Classifier: ", round(f0_5, 1), "%" )





In [None]:
#For Support vector Machine(SVC) Model

#preprocessing the Train And Test Data

from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

# SVM Model
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train,y_train)
y_pred = svclassifier.predict(X_test)
score = accuracy_score(y_test, y_pred) * 100
print("Accuracy using SVM Classifier: ", round(score, 1), "%" )


# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print(cm)

#plot confusion Matrix

plt.clf()
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('Confusion Matrix')
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]
for i in range(2):
    for j in range(2):
        plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))
plt.show()






tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("True Negatives: ",tn)
print("False Positives: ",fp)
print("False Negatives: ",fn)
print("True Positives: ",tp)

#Accuracy
Accuracy = (tn+tp)*100/(tp+tn+fp+fn) 
print("Accuracy using  SVM Classifier: ", round(Accuracy, 1), "%" )

#Precision 
Precision = tp/(tp+fp) 
print("Precision using  SVM Classifier: ", round(Precision, 1), "%" )

#Recall 
Recall = tp/(tp+fn) 
print("Recall using  SVM Classifier: ", round(Recall, 1), "%" )

#F1 Score
f1 = (2*Precision*Recall)/(Precision + Recall)
print("F1 score using  SVM Classifier: ", round(f1, 1), "%" )

#F-beta score calculation
def fbeta(precision, recall, beta):
    return ((1+pow(beta,2))*precision*recall)/(pow(beta,2)*precision + recall)
            
f2 = fbeta(Precision, Recall, 2)
print("F2 score using  SVM Classifier: ", round(f2, 1), "%" )

f0_5 = fbeta(Precision, Recall, 0.5)
print("F0.5 score using  SVM Classifier: ", round(f0_5, 1), "%" )






