In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import OneHotEncoder, LabelBinarizer, LabelEncoder

In [11]:
data = pd.read_csv("Final_Data.csv")
data.head(2)

In [12]:
data.drop(columns=["Company_Name", "Unnamed: 0", "Unnamed: 0.1"], inplace=True)

In [13]:
data.shape

* __For Company_Type__ 
    * __0__ - Startup
    * __1__ - Non-Startup 
* __For Company_Name__ 
    * __0__ - No respose from client
    * __1__ - Client responded 
   

In [15]:
ax = sb.countplot(x="Contact Status", data=data,  hue="Company_Type")

In [16]:
plt.figure(figsize=(10,10))
sb.heatmap(data.corr())

# Seperating Feature and label

In [108]:
X = pd.DataFrame(data.drop(columns="Contact Status"))
y = pd.DataFrame(data["Contact Status"])

# Divide the data into 1:3 ratio

In [110]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Apply Random Forest Algorithm on the data

In [14]:
# Fitting Random Forest Classification to the Training set
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 128, criterion = 'gini', random_state = 55)
classifier.fit(X_train, y_train)

In [113]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

In [114]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1444,   30],
       [ 160,    2]])

In [115]:
from sklearn.metrics import *
f1_score(y_test, y_pred)  

0.020618556701030927

In [116]:
accuracy_score(y_test, y_pred)

0.8838630806845966

# Apply SVM Algorithm on the data

In [117]:
# Fitting SVC Classification to the Training set
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 42)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1471,    3],
       [ 162,    0]])

In [118]:
accuracy_score(y_test, y_pred)

0.8991442542787286

In [119]:
from sklearn.metrics import *
f1_score(y_test, y_pred)  

0.0

# Logistic regression

In [120]:
from sklearn.linear_model import LogisticRegression
classify = LogisticRegression(random_state=45)
classify.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1471,    3],
       [ 162,    0]])

In [121]:
accuracy_score(y_test, y_pred)

0.8991442542787286

In [122]:
from sklearn.metrics import *
f1_score(y_test, y_pred)  

0.0

# Decision Tree Classification

In [123]:
# Fitting Decision Tree Classification to the Training set
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1440,   34],
       [ 160,    2]])

In [124]:
from sklearn.metrics import *
f1_score(y_test, y_pred)

0.0202020202020202

In [125]:
accuracy_score(y_test, y_pred)

0.8814180929095354

# KNN

In [126]:
from sklearn.neighbors import KNeighborsClassifier
classify = KNeighborsClassifier(n_neighbors = 3, metric = "minkowski", p = 2)
classify.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1440,   34],
       [ 160,    2]])

In [127]:
accuracy_score(y_test, y_pred)

0.8814180929095354

In [128]:
from sklearn.metrics import *
f1_score(y_test, y_pred), accuracy_score(y_test, y_pred)

(0.0202020202020202, 0.8814180929095354)

# Decision Tree Using Bagging

In [131]:
from sklearn.ensemble import BaggingClassifier
from sklearn import tree
model = BaggingClassifier(tree.DecisionTreeClassifier(random_state=1))
model.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm, model.score(X_test,y_test)

(array([[1440,   34],
        [ 160,    2]]), 0.8850855745721271)

# AdaBoost Classifier

In [132]:
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier(random_state=1)
model.fit(X_train, y_train)
model.score(X_test,y_test)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm, model.score(X_test,y_test)

(array([[1440,   34],
        [ 160,    2]]), 0.8991442542787286)

# Gradient Boosting Classifier

In [133]:
from sklearn.ensemble import GradientBoostingClassifier
model= GradientBoostingClassifier(learning_rate=0.01,random_state=1)
model.fit(X_train, y_train)
model.score(X_test,y_test)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm, model.score(X_test,y_test)


(array([[1440,   34],
        [ 160,    2]]), 0.9009779951100244)