# Data collection 

In [None]:
import pandas as pd 
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.techindicators import TechIndicators
import matplotlib.pyplot as plt
from time import sleep, time
import numpy as np

In [None]:
def get_indicators(stock):
    ts = TimeSeries(key='BHE3BCBJB782W3I3', output_format='pandas', indexing_type='date')
    ti = TechIndicators(key='BHE3BCBJB782W3I3', output_format='pandas', indexing_type='date')
#     try:
    indicators, _ = ts.get_daily_adjusted(stock, outputsize='full')
#     except:
#         return(None)
    bbands, _ = ti.get_bbands(symbol='EN.PA', interval='daily')
    sma, _ = ti.get_sma(symbol='EN.PA', interval='daily')
    ema, _ = ti.get_ema(symbol='EN.PA', interval='daily')
    #vwap, _ = ti.get_vwap(symbol='EN.PA', interval='daily')
    macd, _ = ti.get_macd(symbol='EN.PA', interval='daily')
    sleep(60)
    stoch, _ = ti.get_stoch(symbol='EN.PA', interval='daily')
    rsi, _ = ti.get_rsi(symbol='EN.PA', interval='daily')
    concat=pd.concat([indicators, bbands, sma, ema, macd, stoch, rsi], axis=1, join='inner')
    concat=concat.rename(columns={"1. open": "Open", "2. high": "High", "3. low": "Low", "4. close": "Close", 
                       "5. adjusted close": "Adj Close", "6. volume": "Vol", "7. dividend amount": "Div amt",
                       "8. split coefficient": "Split"})
    return concat

In [None]:
data=get_indicators("EN.PA")

In [None]:
data.to_csv("Bouygues untreated.csv")

# Data preparation

## Adding values

In [None]:
df=data.copy()

In [None]:
df["Prev_open"]=df["Open"].shift(periods=-1)
df["Prev_close"]=df["Close"].shift(periods=-1)
df["Percent_prev"]=round((df["Prev_close"]-df["Prev_open"])/df["Prev_open"]*100,2)

In [None]:
bins = [-1000, 0.1, 1000]
group_names = [0,1]
df['Target'] = pd.cut(df['Percent_prev'], bins, labels=group_names)
df.head()

## Splitting the dataset

In [None]:
df=df.dropna()
X=np.array(df.iloc[:,:-4])
y=np.array(df['Target'])

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
# scale the feature MinMax, build array
min_max_scaler = MinMaxScaler()
X_normal = min_max_scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test= train_test_split(X_normal,y, test_size=0.25, random_state = 42)

# Classifiers

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, SVR
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model import LinearRegression

In [None]:
names = ["Nearest Neighbors", "Linear SVM", "SVC high C", "RBF SVM",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(kernel="linear", C=1000),
    SVC(gamma=2, C=1),
    #GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

assert len(names)==len(classifiers)

In [None]:
def prediction_classifier(name, clf):
    timer=time()
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    y_pred = clf.predict(X_test)
    return [score, round(time()-timer,1), y_pred]

In [None]:
results=pd.DataFrame(columns=['Score', 'Time', 'Results'])
for (name, classifier) in zip(names, classifiers):
    timer=time()
    clf=classifier
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    y_pred = clf.predict(X_test)
    timer=time()-timer
    results.loc[name] = [score, timer, y_pred]
    print(name, " done")

In [None]:
results

# What about 3 output values

In [None]:
import pandas as pd
import numpy as np
data = pd.read_csv("Bouygues untreated.csv", index_col="date") 
data= data.sort_index(axis = 0, ascending=False)

In [None]:
data.head()

In [None]:
df=data.copy()

In [None]:
df["Prev_open"]=df["Open"].shift(periods=-1)
df["Prev_close"]=df["Close"].shift(periods=-1)
df["Percent_prev"]=round((df["Prev_close"]-df["Prev_open"])/df["Prev_open"]*100,2)

In [None]:
bins = [-1000, 0.1, 1, 1000]
group_names = [0,1, 2]
df['Target'] = pd.cut(df['Percent_prev'], bins, labels=group_names)
df.head()

In [None]:
def drop_column(column, dataset):
    dfprov=dataset
    try:
        dfprov=dfprov.drop([column], axis=1)
        return dfprov
    except:
        KeyError
        return dfprov

In [None]:
df=drop_column('Div amt', df)
df=drop_column('Split', df)

In [None]:
df['High_var']=(df['High']-df['Open'])/df['Open']*100
df['Low_var']=(df['Low']-df['Open'])/df['Open']*100
df['Close_var']=(df['Close']-df['Open'])/df['Open']*100
df['Vol_pct']=(df['Vol'])/df['Vol'].median()

In [None]:
df.head()

In [None]:
#Moving averages 15 50 200 days
df['RollingMean15']=(df['Open'].rolling(window=15).mean()-df['Open'])/df['Open']*100
df['RollingMean50']=(df['Open'].rolling(window=50).mean()-df['Open'])/df['Open']*100

In [None]:
df

In [None]:
df["Target"].value_counts()

In [None]:
df=df.dropna()
X=np.array(df.iloc[:,:-4])
y=np.array(df['Target'])

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
# scale the feature MinMax, build array
min_max_scaler = MinMaxScaler()
X_normal = min_max_scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test= train_test_split(X_normal,y, test_size=0.25, random_state = 42)

In [None]:
from time import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, SVR
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report

In [None]:
#QDA removed

names = ["Nearest Neighbors", "Linear SVM", "SVC high C", "RBF SVM",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(kernel="linear", C=1000),
    SVC(gamma=2, C=1),
    #GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB()]

assert len(names)==len(classifiers)

In [None]:
def prediction_classifier(name, clf):
    timer=time()
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    y_pred = clf.predict(X_test)
    return [score, round(time()-timer,1), y_pred]

In [None]:
results=pd.DataFrame(columns=['Score', 'Time', 'Results', 'Report'])
for (name, classifier) in zip(names, classifiers):
    timer=time()
    clf=classifier
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    y_pred = clf.predict(X_test)
    timer=time()-timer
    report=classification_report(y_true, y_pred)
    results.loc[name] = [score, timer, y_pred, report]
    print(name, " done")

In [None]:
results.loc["TEST"]=[0,0,y_test,""]
results

In [None]:
print(results.iat[2,3])

In [None]:
#SVC fine tuning

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]},
                    {'kernel': ['poly'], 'degree':[1,2,3,5], 'C': [1, 10, 100, 1000] }]

clf = GridSearchCV(SVC(), tuned_parameters)
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print(clf.best_params_)
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()

In [None]:
prediction_classifier("name", QDA(priors=None, reg_param=0.9))

In [None]:
clf=RandomForestClassifier(max_depth=10, n_estimators=100, max_features=10)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
y_pred = clf.predict(X_test)
timer=time()-timer
print(classification_report(y_true, y_pred))

In [None]:
#Random forest fine tuning

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

tuned_parameters = [{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}]

clf = GridSearchCV(RandomForestClassifier(), tuned_parameters)
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print(clf.best_params_)
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()

In [None]:
params_RandomForest= clf.best_params_

In [None]:
# Important to have a recall for 0 as close as 1 as possible, it enables us not to buy stocks that will go down

# CHanging values

In [3]:
import pandas as pd
data = pd.read_csv("./StockHistory/History_stock_AC.PA.csv", index_col="date") 
data= data.sort_index(axis = 0, ascending=False)

In [4]:
df=data.copy()

In [5]:
def drop_column(column, dataset):
    dfprov=dataset
    try:
        dfprov=dfprov.drop([column], axis=1)
        return dfprov
    except:
        KeyError
        return dfprov

In [6]:
df=drop_column('Div amt', df)
df=drop_column('Split', df)
df=drop_column('Adj Close', df)

In [7]:
df['High_var']=(df['High']-df['Open'])/df['Open']*100
df['Low_var']=(df['Low']-df['Open'])/df['Open']*100
df['Close_var']=(df['Close']-df['Open'])/df['Open']*100
df['Vol_pct']=(df['Vol'])/df['Vol'].median()

In [8]:
#Moving averages 15 50 200 days
df= df.sort_index(axis = 0, ascending=True)
df['RM15']=df['Open'].rolling(window=15).mean()
df['RM50']=df['Open'].rolling(window=50).mean()
df['RM15_pct']=(df['RM15']-df['Open'])/df['Open']*100
df['RM50_pct']=(df['RM50']-df['Open'])/df['Open']*100
df= df.sort_index(axis = 0, ascending=False)

In [9]:
df['SMA_pct']=(df['SMA']-df['Open'])/df['Open']*100
df['EMA_pct']=(df['EMA']-df['Open'])/df['Open']*100

In [10]:
df['Bband_mid_pct']=(df['Real Middle Band']-df['Open'])/df['Open']*100
df['Bband_low_pct']=(df['Real Lower Band']-df['Open'])/df['Open']*100
df['Bband_up_pct']=(df['Real Upper Band']-df['Open'])/df['Open']*100
df['Bband_pos']=(df['Real Upper Band'] - df['Open'])/(df['Real Upper Band'] - df['Real Lower Band'])
df['RSI']=df['RSI']/100
df['ADX']=df['ADX']/100
df['StochK']=df['SlowK']/100
df['StochD']=df['SlowD']/100


In [11]:
#adding prevs
df["Prev_open"]=df["Open"].shift(periods=1)
df["Prev_close"]=df["Close"].shift(periods=1)
df["Percent_prev"]=round((df["Prev_close"]-df["Prev_open"])/df["Prev_open"]*100,2)

In [12]:
df_to_compute=df[['High_var','Low_var','Close_var','Vol_pct','RM15_pct', 'RM50_pct', 'SMA_pct','EMA_pct',
                 'Bband_mid_pct','Bband_low_pct','Bband_up_pct','Bband_pos', 'StochK', 'StochD', 'MACD_Signal',
                 'MACD_Hist', 'MACD', 'RSI', 'ADX','Percent_prev']].copy()

In [13]:
df_to_compute.head()

Unnamed: 0_level_0,High_var,Low_var,Close_var,Vol_pct,RM15_pct,RM50_pct,SMA_pct,EMA_pct,Bband_mid_pct,Bband_low_pct,Bband_up_pct,Bband_pos,StochK,StochD,MACD_Signal,MACD_Hist,MACD,RSI,ADX,Percent_prev
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-01-06,0.220805,-2.232581,-1.251227,1.120634,2.011776,-2.623651,0.47105,0.433759,0.47105,-5.459519,6.401619,0.539714,0.115174,0.133878,0.706,-0.1411,0.5649,0.498341,0.173946,
2020-01-03,0.096015,-1.584253,-1.080173,0.723468,-0.43527,-4.847816,-1.893903,-1.56241,-1.893903,-8.151704,4.363898,0.348677,0.059386,0.273136,0.7412,-0.0285,0.7127,0.567402,0.182951,-1.25
2020-01-02,0.2849,-0.854701,-0.854701,0.5214,-1.976891,-6.069326,-3.355888,-2.687559,-3.355888,-10.239791,3.528015,0.256251,0.227074,0.535469,0.7484,0.045,0.7933,0.613691,0.183419,-1.08
2019-12-31,0.645007,-0.262781,-0.262781,0.20312,-1.914318,-5.636885,-3.23698,-2.277353,-3.23698,-10.71333,4.239369,0.283519,0.53295,0.768917,0.7371,0.0916,0.8287,0.613146,0.176493,-0.85
2019-12-30,0.285307,-0.689491,-0.21398,0.628205,-2.870502,-6.268664,-4.122682,-2.953162,-4.122682,-11.807418,3.562054,0.231762,0.846383,0.871556,0.7142,0.1485,0.8627,0.631779,0.169808,-0.26


In [14]:
df_to_compute=df_to_compute.dropna()

In [15]:
# plt.matshow(df_to_compute.corr())
# plt.show()

In [16]:
# ### import matplotlib.pyplot as plt
# from matplotlib import style

# # Adjusting the size of matplotlib
# import matplotlib as mpl
# mpl.rc('figure', figsize=(8, 7))
# mpl.__version__

# # Adjusting the style of matplotlib
# style.use('ggplot')

# df["Open"].plot(label='Open')
# df["RM50"].plot(label='RM50')
# plt.legend()

In [17]:
# import seaborn as sns
# sns.pairplot(df_to_compute)
# sns.plt.show()

In [40]:
bins = [-1000, 0.1, 1000]
group_names = [0,1]
df_to_compute['Target'] = pd.cut(df_to_compute['Percent_prev'], bins, labels=group_names)
df_to_compute.head(20)

Unnamed: 0_level_0,High_var,Low_var,Close_var,Vol_pct,RM15_pct,RM50_pct,SMA_pct,EMA_pct,Bband_mid_pct,Bband_low_pct,...,Bband_pos,StochK,StochD,MACD_Signal,MACD_Hist,MACD,RSI,ADX,Percent_prev,Target
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-03,0.096015,-1.584253,-1.080173,0.723468,-0.43527,-4.847816,-1.893903,-1.56241,-1.893903,-8.151704,...,0.348677,0.059386,0.273136,0.7412,-0.0285,0.7127,0.567402,0.182951,-1.25,0
2020-01-02,0.2849,-0.854701,-0.854701,0.5214,-1.976891,-6.069326,-3.355888,-2.687559,-3.355888,-10.239791,...,0.256251,0.227074,0.535469,0.7484,0.045,0.7933,0.613691,0.183419,-1.08,0
2019-12-31,0.645007,-0.262781,-0.262781,0.20312,-1.914318,-5.636885,-3.23698,-2.277353,-3.23698,-10.71333,...,0.283519,0.53295,0.768917,0.7371,0.0916,0.8287,0.613146,0.176493,-0.85,0
2019-12-30,0.285307,-0.689491,-0.21398,0.628205,-2.870502,-6.268664,-4.122682,-2.953162,-4.122682,-11.807418,...,0.231762,0.846383,0.871556,0.7142,0.1485,0.8627,0.631779,0.169808,-0.26,0
2019-12-27,0.570478,-0.546708,0.332779,0.44624,-3.410189,-6.480628,-4.518659,-3.264321,-4.518659,-12.130734,...,0.203191,0.927419,0.788438,0.6771,0.1942,0.8713,0.652323,0.162571,-0.21,0
2019-12-25,0.0,0.0,0.0,0.0,-3.917182,-6.574964,-4.765588,-3.52832,-4.765588,-12.110662,...,0.175593,0.840866,0.631518,0.6285,0.2165,0.8451,0.643607,0.154282,0.33,1
2019-12-24,0.621415,-0.047801,0.43021,0.102756,-4.196941,-6.389101,-4.666587,-3.486377,-4.666587,-11.727533,...,0.169549,0.59703,0.500562,0.5744,0.2443,0.8187,0.643607,0.146986,0.0,0
2019-12-23,1.840194,0.0,1.670702,0.759695,-3.491525,-5.333172,-3.715496,-2.642131,-3.715496,-10.468281,...,0.224892,0.456657,0.529395,0.5133,0.2598,0.7731,0.642329,0.139307,0.43,1
2019-12-20,0.532688,-1.307506,-0.387409,1.526552,-3.898305,-5.48862,-4.032688,-3.096126,-4.032688,-10.323729,...,0.17949,0.447999,0.661577,0.4484,0.2583,0.7067,0.604127,0.131409,1.67,1
2019-12-19,0.145068,-1.329787,-0.024178,1.463934,-4.392328,-5.803191,-4.446325,-3.521277,-4.446325,-10.545213,...,0.135481,0.68353,0.829311,0.3838,0.3145,0.6983,0.619661,0.125658,-0.39,0


In [41]:
df=df_to_compute.copy()
df["Target"].value_counts()

0    2713
1    2335
Name: Target, dtype: int64

In [42]:
import numpy as np
df=df.dropna()
X=np.array(df.iloc[:,:-2])
y=np.array(df['Target'])

In [43]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
# scale the feature MinMax, build array
min_max_scaler = MinMaxScaler()
X_normal = min_max_scaler.fit_transform(X)

In [44]:
X_train, X_test, y_train, y_test= train_test_split(X_normal,y, test_size=0.25, random_state = 42)

In [45]:
from time import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, SVR
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report

In [46]:
#QDA removed

names = ["Nearest Neighbors", "Linear SVM", "SVC high C", "RBF SVM",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(kernel="linear", C=1000),
    SVC(gamma=2, C=1),
    #GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB()]

assert len(names)==len(classifiers)

In [47]:
def prediction_classifier(name, clf):
    timer=time()
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    y_pred = clf.predict(X_test)
    return [score, round(time()-timer,1), y_pred]

In [48]:
results=pd.DataFrame(columns=['Score', 'Time', 'Results', 'Report'])
y_true=y_test
for (name, classifier) in zip(names, classifiers):
    timer=time()
    clf=classifier
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    y_pred = clf.predict(X_test)
    timer=time()-timer
    report=classification_report(y_true, y_pred)
    results.loc[name] = [score, timer, y_pred, report]
    print(name, " done")

Nearest Neighbors  done


  'precision', 'predicted', average, warn_for)


Linear SVM  done
SVC high C  done
RBF SVM  done
Decision Tree  done
Random Forest  done


  'precision', 'predicted', average, warn_for)


Neural Net  done
AdaBoost  done
Naive Bayes  done


In [49]:
results.loc["TEST"]=[0,0,y_test,""]

In [50]:
results

Unnamed: 0,Score,Time,Results,Report
Nearest Neighbors,0.49683,0.162462,"[1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, ...",precision recall f1-score s...
Linear SVM,0.544374,0.42257,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",precision recall f1-score s...
SVC high C,0.544374,10.699907,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",precision recall f1-score s...
RBF SVM,0.540412,0.841616,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",precision recall f1-score s...
Decision Tree,0.534865,0.038604,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",precision recall f1-score s...
Random Forest,0.547544,0.038101,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",precision recall f1-score s...
Neural Net,0.544374,1.321774,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",precision recall f1-score s...
AdaBoost,0.507924,0.456692,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, ...",precision recall f1-score s...
Naive Bayes,0.535658,0.005014,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, ...",precision recall f1-score s...
TEST,0.0,0.0,"[1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, ...",


In [51]:
print(results.iat[0,3])

             precision    recall  f1-score   support

          0       0.54      0.51      0.53       687
          1       0.45      0.48      0.47       575

avg / total       0.50      0.50      0.50      1262



In [35]:
#SVC fine tuning

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]},
                    {'kernel': ['poly'], 'degree':[1,2,3,5], 'C': [1, 10, 100, 1000] }]

clf = GridSearchCV(SVC(), tuned_parameters, verbose=1)
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print(clf.best_params_)
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()

Fitting 3 folds for each of 28 candidates, totalling 84 fits


[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed:  5.0min finished


Best parameters set found on development set:
{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

             precision    recall  f1-score   support

          0       0.54      1.00      0.70       687
          1       0.00      0.00      0.00       290
          2       0.00      0.00      0.00       285

avg / total       0.30      0.54      0.38      1262




  'precision', 'predicted', average, warn_for)


In [38]:
prediction_classifier("", SVC(kernel="rbf", C=0.025, gamma=1e-8))[2]

AttributeError: 'numpy.ndarray' object has no attribute 'count'