# Regression Algorithms for Muon Data

We will first check some linear regression algorithms (using the negative mean square error scorer).

### Linear Regression

The ML algorithms used for linear regression are: linear regression, Lasso and the ElasticNet.

In [None]:
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

import matplotlib.pyplot as plt
import os

The first option is divide the train/set in due different datasets.

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# create training and testing vars
filename = '../MuonPOGAnalysisTemplate/output/bxcut_org.csv'
dataframe = read_csv(filename)
array = dataframe.values
dataframe
X = array[:,0:len(dataframe.columns)-1]
Y = array[:,len(dataframe.columns)-1]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [None]:
lm = LinearRegression()
model = lm.fit(X_train, y_train)
predictions = lm.predict(X_test)

## The line / model
fig, ax = plt.subplots()
ax.scatter(y_test, predictions)
ax.plot([Y.min(), Y.max()], [Y.min(), Y.max()], 'k--', lw=4)
ax.set_xlabel("True Values")
ax.set_ylabel("Predictions")
plt.show()
del ax

The other option is to use a Kfold for cross_validation

In [None]:
#load dataset
filename = '../MuonPOGAnalysisTemplate/output/bxcut_org.csv'
dataframe = read_csv(filename)
array = dataframe.values
dataframe
X = array[:,0:len(dataframe.columns)-1]
Y = array[:,len(dataframe.columns)-1]

#prepare models
models = []
models.append(( 'LR' , LinearRegression()))
models.append(( 'LAR' , Lasso()))
models.append(( 'RIR' , Ridge()))
models.append(( 'EN' , ElasticNet()))

#evaluate each model in turn
results = []
names = []
scoring = 'neg_mean_squared_error'
for name,model in models:
    kfold = KFold(n_splits=15, random_state=7)
    cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    predicted = cross_val_predict(model, X, Y, cv=kfold)
    fig, ax = plt.subplots()
    ax.scatter(Y, predicted, edgecolors=(0, 0, 0))
    ax.plot([Y.min(), Y.max()], [Y.min(), Y.max()], 'k--', lw=4)
    ax.set_xlabel('Measured')
    ax.set_ylabel('Predicted')
    plt.show()
    del ax

    # boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()


### Non linear Regression

The non linear regression algorithms are: KNeighbotsRegressor, DecisionTreeRegressor, SVR 

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

In [None]:
#load dataset
filename = '../MuonPOGAnalysisTemplate/output/bxcut_org.csv'
dataframe = read_csv(filename)
array = dataframe.values
dataframe
X = array[:,0:len(dataframe.columns)-1]
Y = array[:,len(dataframe.columns)-1]

#prepare models
models = []
models.append(( 'KNR' , KNeighborsRegressor()))
models.append(( 'DTR' , DecisionTreeRegressor()))
models.append(( 'SVR' , SVR()))

#evaluate each model in turn
results = []
names = []
scoring = 'neg_mean_squared_error'
for name,model in models:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    predicted = cross_val_predict(model, X, Y, cv=kfold)
    fig, ax = plt.subplots()
    ax.scatter(Y, predicted, edgecolors=(0, 0, 0))
    ax.plot([Y.min(), Y.max()], [Y.min(), Y.max()], 'k--', lw=4)
    ax.set_xlabel('Measured')
    ax.set_ylabel('Predicted')
    plt.show()
    del ax

    # boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

