# Machine Learning toolkit

#### Fundaments

1. End to end example (more examples in folder with use cases for each algo)
2. Preprocessing data (more in data-preprocessing-toolkit)
3. Train/Test split

#### Machine Learning alghoritms

1. Linear Regression
2. Naive Bayes
3. Gaussian Naive Bayes
4. KNN
5. KNM
6. Linear SCV


#### Metrics:

1. Classification: Accuracy, Confusion Matrix, Classification Report, Recall, Precision, F1
2. Regression: MAE, MSE, R2
3. Clustering: Adjusted Rand Index, Homogeneity, V-measure
4. Cross-Validation


##### To be developed further.

In [1]:
# Example DF

import pandas as pd
df = pd.read_csv('AI_Psycho_tweets_prepared.csv', sep='\t', encoding='utf-8')

In [None]:
# A Basic Example

from sklearn import neighbors, datasets, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
iris = datasets.load_iris()
X, y = iris.data[:, :2], iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33)
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
# Preprocessing data

# Missing Values
from sklearn.preprocessing import Imputer
imp = Imputer(missing_values=0, strategy='mean', axis=0)
imp.fit_transform(X_train)

# Standarization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
standardized_X = scaler.transform(X_train)
standardized_X_test = scaler.transform(X_test)

# Normalization
from sklearn.preprocessing import Normalizer
scaler = Normalizer().fit(X_train)
normalized_X = scaler.transform(X_train)
normalized_X_test = scaler.transform(X_test)

# Binarization
from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=0.0).fit(X)
binary_X = binarizer.transform(X)

# Encoding Categorical Features
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
y = enc.fit_transform(y)

# Generating Polynomial Features
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(5)
poly.fit_transform(X)

In [None]:
# Train / Test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, test_size=0.2) # , random_state=0)

In [None]:
# Machine Learning Alghoritms: Supervised

# Model Fitting: supervised learning
lr.fit(X, y)
knn.fit(X_train, y_train)
svc.fit(X_train, y_train)

# Prediction: supervised learning
y_pred = svc.predict(np.random.random((2,5))) # Predit labels
y_pred = lr.predict(X_test) # Predict labels
y_pred = knn.predict_proba(X_test) # Predict labels propability

In [None]:
# Linear Regression
from sklearn.linear_model import LinearRegression
lr = LinearRegression(normalize=True)

# Linear Regression
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(x_training_data, y_training_data)
prediction = model.predict(your_x_data)


from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
 
# Cannot use Rank 1 matrix in scikit learn
X = X.reshape((m, 1))
# Creating Model
reg = LinearRegression()
# Fitting training data
reg = reg.fit(X, Y)
# Y Prediction
Y_pred = reg.predict(X)
 
# Calculating R2 Score
r2_score = reg.score(X, Y)
 
print(r2_score)

In [None]:
# Naive Bayes
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(x_training_data, y_training_data)
predictions = model.predict(your_x_data)
probabilities = model.predict_proba(your_x_data)

In [None]:
# Gaussian NB
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

gnb = GaussianNB()

pred = gnb.fit(data_train, target_train).predict(data_test)
#print(pred.tolist())

print("Naive-Bayes accuracy : ",accuracy_score(target_test, pred, normalize = True))

### Performance comparsion
from yellowbrick.classifier import ClassificationReport

# Instantiate the classification model and visualizer
visualizer = ClassificationReport(gnb, classes=['Won','Loss'])

visualizer.fit(data_train, target_train)  # Fit the training data to the visualizer
visualizer.score(data_test, target_test)  # Evaluate the model on the test data
g = visualizer.poof()             # Draw/show/poof the data

In [None]:
# KNN
from sklearn.neigbors import KNeighborsClassifier



model = KNeighborsClassifier()
model.fit(x_training_data, y_training_data)
predictions = model.predict(your_x_data)
probabilities = model.predict_proba(your_x_data)


from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(data_train, target_train)
pred = neigh.predict(data_test)
print ("KNeighbors accuracy score : ",accuracy_score(target_test, pred))

from yellowbrick.classifier import ClassificationReport

# Instantiate the classification model and visualizer
visualizer = ClassificationReport(neigh, classes=['Won','Loss'])

visualizer.fit(data_train, target_train)  # Fit the training data to the visualizer
visualizer.score(data_test, target_test)  # Evaluate the model on the test data
g = visualizer.poof()             # Draw/show/poof the data

# KNN
from sklearn import neighbors
knn = neighbors.KNeighborsClassifier(n_neighbors=5)


In [None]:
# Linear SCV
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

svc_model = LinearSVC(random_state=0)

pred = svc_model.fit(data_train, target_train).predict(data_test)

print("LinearSVC accuracy : ",accuracy_score(target_test, pred, normalize = True))

from yellowbrick.classifier import ClassificationReport

# Instantiate the classification model and visualizer
visualizer = ClassificationReport(svc_model, classes=['Won','Loss'])

visualizer.fit(data_train, target_train)  # Fit the training data to the visualizer
visualizer.score(data_test, target_test)  # Evaluate the model on the test data
g = visualizer.poof()             # Draw/show/poof the data

In [None]:
## Support Vector Machines (SVM) 
from sklearn.svm import SVC
svc = SVC(kernel='linear')

In [None]:
# Machine Learning Alghoritms: Unsupervised

# Model Fitting: Unsupervised Learning
k_means.fit(X_train)
pca_model = pca.fit_transform(X_train)

# Predictions: unsupervised learning
>>> y_pred = k_means.predict(X_test)

In [None]:
# Principal Component Analysis (PCA)
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)

In [None]:
# K Means
from sklearn.cluster import KMeans
k_means = KMeans(n_clusters=3, random_state=0)

# KNMeans
from sklearn.cluster import KMeans

model = KMeans(n_clusters=4, init='random')
model.fit(x_training_data)
predictions = model.predict(your_x_data)