In [1]:
import numpy as np


def load_sensor_data(accelerometer_file, gyroscope_file):
    accelerometer_data = np.genfromtxt(accelerometer_file, delimiter=',', usecols=(0, 3, 4, 5), dtype=('int', 'float', 'float', 'float'), converters={5: lambda x: float(x[:-1])})
    gyroscope_data = np.genfromtxt(gyroscope_file, delimiter=',', usecols=(0, 3, 4, 5), dtype=('int', 'float', 'float', 'float'), converters={5: lambda x: float(x[:-1])})
    return accelerometer_data, gyroscope_data


accelerometer_file = 'data_1604_accel_watch.txt'  
gyroscope_file = 'data_1604_gyro_watch.txt' 


accelerometer_data, gyroscope_data = load_sensor_data(accelerometer_file, gyroscope_file)

print("Accelerometer Data:")
print(accelerometer_data)

print("Gyroscope Data:")
print(gyroscope_data)


Accelerometer Data:
[(1604, 3.478326  , -2.3662195, -4.0174704 )
 (1604, 0.48557404, -4.9328036, -3.217807  )
 (1604, 1.4049475 , -1.2193968,  0.80684596) ...
 (1604, 7.902811  , -1.2193968,  8.597578  )
 (1604, 8.788666  , -4.1451116, 10.084377  )
 (1604, 7.3856635 , -4.1451116,  8.724471  )]
Gyroscope Data:
[(1604,  1.947164 , 0.9647269 , 0.675196  )
 (1604,  2.9410558, 1.5058812 , 0.2235239 )
 (1604,  2.4116194, 0.38415778, 0.6720002 ) ...
 (1604,  1.2757971, 0.53985095, 0.40140682)
 (1604,  0.1636611, 0.23412009, 0.45467004)
 (1604, -0.4648449, 0.926542  , 0.7220514 )]


In [2]:
import numpy as np

file_path = 'data_1604_accel_watch2.txt'


accelerometer_data = np.loadtxt(file_path, delimiter=',', usecols=(3, 4, 5), skiprows=0, dtype=float)
timestamps = np.loadtxt(file_path, delimiter=',', usecols=(2,), skiprows=0, dtype=np.int64)


accelerometer_data /= np.max(np.abs(accelerometer_data), axis=0)

mean = np.mean(accelerometer_data, axis=0)
std = np.std(accelerometer_data, axis=0)
normalized_accelerometer_data = (accelerometer_data - mean) / std


print("Normalized accelerometer data:")
print(normalized_accelerometer_data)
print("\nTimestamps:")
print(timestamps)


Normalized accelerometer data:
[[ 0.20288795  0.36686327 -1.4572432 ]
 [-0.22280364 -0.12234547 -1.308463  ]
 [-0.09203118  0.58545564 -0.55966212]
 ...
 [ 0.83223048  0.58545564  0.88983107]
 [ 0.95823525  0.02779409  1.16645527]
 [ 0.75867097  0.02779409  0.91343996]]

Timestamps:
[696459231597344 696459281097344 696459330597344 ... 696163101091414
 696163151015054 696163200938694]


In [4]:
import pandas as pd
import numpy as np
import glob
import os
def load_accelerometer_data(file_path):
    accelerometer_data = np.loadtxt(file_path, delimiter=',', usecols=(3, 4, 5), skiprows=0, dtype=float)
    df = pd.read_csv(file_path, delimiter=',', usecols=(1, 3, 4, 5), names=['Activity', 'X', 'Y', 'Z'])
    return df


def create_accelerometer_dataframe(folder_path):
    file_paths = glob.glob(os.path.join(folder_path, '*.txt'))  # Get a list of file paths in the folder
    data_frames = []
    for file_path in file_paths:
        df = load_accelerometer_data(file_path)
        data_frames.append(df)
    combined_df = pd.concat(data_frames, ignore_index=True)
    return combined_df


folder_path = 'wisdm dataset'  
accelerometer_dataframe = create_accelerometer_dataframe(folder_path)



In [5]:
print(accelerometer_dataframe)


        Activity         X          Y         Z
0              A  7.091625  -0.591667  8.195502
1              A  4.972757  -0.158317  6.696732
2              A  3.253720  -0.191835  6.107758
3              A  2.801216  -0.155922  5.997625
4              A  3.770868  -1.051354  7.731027
...          ...       ...        ...       ...
3616242        S  1.486500 -13.302334  5.355231
3616243        S  4.017171 -13.146711  5.118205
3616244        S  4.412214 -15.318253  3.370437
3616245        S  4.426579 -13.012636  2.575563
3616246        S  3.765780 -11.798776  2.166154

[3616247 rows x 4 columns]


# ACCELEROMETER DATA

# Shape descriptors XY



In [173]:

import pandas as pd


def extract_ferret_measure(data):
    x_values = []
    y_values = []

   
    for line in data:
       
        values = line.split(',')

      
        x_values.append(float(values[3]))
        y_values.append(float(values[4]))

    
    x_ferret = np.max(x_values) - np.min(x_values)
    y_ferret = np.max(y_values) - np.min(y_values)
    
    F = x_ferret/y_ferret

    return F

def extract_centroid_distance(data):
    x_values = []
    y_values = []

  
    for line in data:
        
        values = line.split(',')

        
        x_values.append(float(values[3]))
        y_values.append(float(values[4]))

    
    centroid_x = np.mean(x_values)
    centroid_y = np.mean(y_values)

    
    distances = np.sqrt((np.array(x_values) - centroid_x) ** 2 + (np.array(y_values) - centroid_y) ** 2)
    centroid_distance = np.mean(distances)

    return centroid_distance



def extract_polygon_area(data):
    x_values = []
    y_values = []

    
    for line in data:
        
        values = line.split(',')

        
        x_values.append(float(values[3]))
        y_values.append(float(values[4]))

    
    area = 0.5 * np.abs(np.dot(x_values, np.roll(y_values, 1)) - np.dot(y_values, np.roll(x_values, 1)))

    return area

def extract_polygon_perimeter(data):
    x_values = []
    y_values = []

   
    for line in data:
        
        values = line.split(',')

    
        x_values.append(float(values[3]))
        y_values.append(float(values[4]))

    
    dx = np.diff(x_values)
    dy = np.diff(y_values)
    perimeter = np.sum(np.sqrt(dx**2 + dy**2))

    return perimeter

def compute_shape_descriptors(data_frame):
    rows = []
    for i in range(0, len(data_frame), 200):
        subset = data_frame.iloc[i:i+200]

        x_signal = subset['X'].values
        y_signal = subset['Y'].values
        signal = np.column_stack((x_signal, y_signal))

        feret_measure = compute_feret_measure(signal)
        centroid_distance = compute_centroid_distance(signal)
        area = compute_area(signal)
        perimeter = compute_perimeter(signal)
        activity = subset['Activity'].iloc[0]

        rows.append([activity, feret_measure, centroid_distance, area, perimeter])

    result_df = pd.DataFrame(rows, columns=['Activity', 'Feret MeasureXY (SD1)', 'Centroid DistanceXY (SD2)', 'AreaXY (SD3)', 'PerimeterXY (SD4)'])
    return result_df


folder_path = 'wisdm dataset'  
accelerometer_dataframe = create_accelerometer_dataframe(folder_path)

shape_descriptor_df = compute_shape_descriptors(accelerometer_dataframe)


print(shape_descriptor_df)


      Activity  Feret MeasureXY (SD1)  Centroid DistanceXY (SD2)  \
0            A               3.446315                   4.166108   
1            A               3.607655                   3.769465   
2            A               2.474806                   3.269388   
3            A               2.812685                   3.473873   
4            A               2.091509                   3.107745   
...        ...                    ...                        ...   
18077        S              -6.633591                   5.770354   
18078        S              10.899521                   6.657011   
18079        S             -62.239627                   5.964674   
18080        S              17.274172                   3.856026   
18081        S              -5.055366                   7.254880   

       AreaXY (SD3)  PerimeterXY (SD4)  
0         72.882228         637.108259  
1        143.563950         708.345531  
2         79.493333         561.626765  
3        136.785846

In [164]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

activity_column = shape_descriptor_df['Activity']
feature_columns = shape_descriptor_df.drop('Activity', axis=1)


if np.any(np.isnan(feature_columns)) or np.any(np.isinf(feature_columns)):
    
    feature_columns = feature_columns.replace([np.inf, -np.inf], np.nan).fillna(0)

scaler = MinMaxScaler()
feature_columns_normalized = pd.DataFrame(scaler.fit_transform(feature_columns),
                                          columns=feature_columns.columns)

shape_descriptor_df_normalized = pd.concat([activity_column, feature_columns_normalized], axis=1)

print(shape_descriptor_df_normalized)

      Activity  Feret MeasureXY (SD1)  Centroid DistanceXY (SD2)  \
0            A               0.331873                   0.156731   
1            A               0.331875                   0.141765   
2            A               0.331863                   0.122897   
3            A               0.331867                   0.130613   
4            A               0.331859                   0.116798   
...        ...                    ...                        ...   
18077        S               0.331770                   0.217260   
18078        S               0.331949                   0.250715   
18079        S               0.331203                   0.224592   
18080        S               0.332014                   0.145032   
18081        S               0.331786                   0.273273   

       AreaXY (SD3)  PerimeterXY (SD4)  
0          0.292559           0.174989  
1          0.296488           0.194637  
2          0.292927           0.154170  
3          0.296111

# Shape descriptors YZ

In [162]:
import numpy as np
import pandas as pd


def extract_ferret_measure(data):
    y_values = []
    z_values = []

   
    for line in data:
       
        values = line.split(',')

      
        y_values.append(float(values[3]))
        z_values.append(float(values[4]))

    
    y_ferret = np.max(y_values) - np.min(y_values)
    z_ferret = np.max(z_values) - np.min(z_values)
    
    F = y_ferret/z_ferret

    return F

def extract_centroid_distance(data):
    y_values = []
    z_values = []

  
    for line in data:
        
        values = line.split(',')

        
        y_values.append(float(values[4]))
        z_values.append(float(values[5]))

    
    centroid_y = np.mean(y_values)
    centroid_z = np.mean(z_values)

    
    distances = np.sqrt((np.array(y_values) - centroid_y) ** 2 + (np.array(z_values) - centroid_z) ** 2)
    centroid_distance = np.mean(distances)

    return centroid_distance



def extract_polygon_area(data):
    y_values = []
    z_values = []

    
    for line in data:
        
        values = line.split(',')

        
        y_values.append(float(values[4]))
        z_values.append(float(values[5]))

    
    area = 0.5 * np.abs(np.dot(y_values, np.roll(z_values, 1)) - np.dot(x_values, np.roll(z_values, 1)))

    return area

def extract_polygon_perimeter(data):
    y_values = []
    z_values = []

   
    for line in data:
        
        values = line.split(',')

    
        y_values.append(float(values[4]))
        z_values.append(float(values[5]))

    
    dy = np.diff(y_values)
    dz = np.diff(z_values)
    perimeter = np.sum(np.sqrt(dy**2 + dz**2))

    return perimeter

def compute_shape_descriptors(data_frame):
    rows = []
    for i in range(0, len(data_frame), 200):
        subset = data_frame.iloc[i:i+200]

        y_signal = subset['Y'].values
        z_signal = subset['Z'].values
        signal = np.column_stack((y_signal, z_signal))

        feret_measure = compute_feret_measure(signal)
        centroid_distance = compute_centroid_distance(signal)
        area = compute_area(signal)
        perimeter = compute_perimeter(signal)
        activity = subset['Activity'].iloc[0]

        rows.append([activity, feret_measure, centroid_distance, area, perimeter])

    result_df = pd.DataFrame(rows, columns=['Activity', 'Feret MeasureYZ (SD1)', 'Centroid DistanceYZ (SD2)', 'AreaYZ (SD3)', 'PerimeterYZ (SD4)'])
    return result_df


folder_path = 'wisdm dataset'  
accelerometer_dataframe = create_accelerometer_dataframe(folder_path)

shape_descriptor_dfayz = compute_shape_descriptors(accelerometer_dataframe)


print(shape_descriptor_dfayz)

  feret_measure = (max_val - min_val) / (max_val + min_val)


      Activity  Feret MeasureYZ (SD1)  Centroid DistanceYZ (SD2)  \
0            A              36.486202                   3.078727   
1            A             -11.240089                   3.144069   
2            A             -12.493017                   2.439367   
3            A             -16.547802                   2.398105   
4            A             117.711032                   2.424349   
...        ...                    ...                        ...   
18077        S            -138.231664                   6.051039   
18078        S               7.133741                   5.828642   
18079        S              11.715436                   6.416312   
18080        S               5.361884                   5.197976   
18081        S              48.098740                   5.468687   

       AreaYZ (SD3)  PerimeterYZ (SD4)  
0         46.964787         523.435849  
1          3.325218         581.780493  
2          5.887382         444.327755  
3        -50.708939

In [165]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

activity_column = shape_descriptor_dfayz['Activity']
feature_columns = shape_descriptor_dfayz.drop('Activity', axis=1)


if np.any(np.isnan(feature_columns)) or np.any(np.isinf(feature_columns)):
    
    feature_columns = feature_columns.replace([np.inf, -np.inf], np.nan).fillna(0)

scaler = MinMaxScaler()
feature_columns_normalized = pd.DataFrame(scaler.fit_transform(feature_columns),
                                          columns=feature_columns.columns)

shape_descriptor_dfayz_normalized = pd.concat([activity_column, feature_columns_normalized], axis=1)

print(shape_descriptor_dfayz_normalized)


      Activity  Feret MeasureYZ (SD1)  Centroid DistanceYZ (SD2)  \
0            A               0.417199                   0.103172   
1            A               0.416655                   0.105370   
2            A               0.416641                   0.081660   
3            A               0.416594                   0.080272   
4            A               0.418125                   0.081155   
...        ...                    ...                        ...   
18077        S               0.415207                   0.203177   
18078        S               0.416864                   0.195694   
18079        S               0.416917                   0.215467   
18080        S               0.416844                   0.174475   
18081        S               0.417331                   0.183583   

       AreaYZ (SD3)  PerimeterYZ (SD4)  
0          0.296410           0.105986  
1          0.292901           0.117860  
2          0.293107           0.089887  
3          0.288556

# Shape descriptors ZX

In [163]:
import numpy as np
import pandas as pd


def extract_ferret_measure(data):
    z_values = []
    x_values = []

   
    for line in data:
       
        values = line.split(',')

      
        z_values.append(float(values[5]))
        x_values.append(float(values[3]))

    
    z_ferret = np.max(z_values) - np.min(z_values)
    x_ferret = np.max(x_values) - np.min(x_values)
    
    F = z_ferret/x_ferret

    return F

def extract_centroid_distance(data):
    z_values = []
    x_values = []

  
    for line in data:
        
        values = line.split(',')

        
        z_values.append(float(values[5]))
        x_values.append(float(values[3]))

    
    centroid_z = np.mean(z_values)
    centroid_x = np.mean(x_values)

    
    distances = np.sqrt((np.array(z_values) - centroid_z) ** 2 + (np.array(x_values) - centroid_x) ** 2)
    centroid_distance = np.mean(distances)

    return centroid_distance



def extract_polygon_area(data):
    z_values = []
    x_values = []

    
    for line in data:
        
        values = line.split(',')

        
        z_values.append(float(values[5]))
        x_values.append(float(values[3]))

    
    area = 0.5 * np.abs(np.dot(z_values, np.roll(x_values, 1)) - np.dot(y_values, np.roll(x_values, 1)))

    return area

def extract_polygon_perimeter(data):
    z_values = []
    x_values = []

   
    for line in data:
        
        values = line.split(',')

    
        z_values.append(float(values[5]))
        x_values.append(float(values[3]))

    
    dz = np.diff(z_values)
    dx = np.diff(x_values)
    perimeter = np.sum(np.sqrt(dz**2 + dx**2))

    return perimeter

def compute_shape_descriptors(data_frame):
    rows = []
    for i in range(0, len(data_frame), 200):
        subset = data_frame.iloc[i:i+200]

        z_signal = subset['Z'].values
        x_signal = subset['X'].values
        signal = np.column_stack((z_signal, x_signal))

        feret_measure = compute_feret_measure(signal)
        centroid_distance = compute_centroid_distance(signal)
        area = compute_area(signal)
        perimeter = compute_perimeter(signal)
        activity = subset['Activity'].iloc[0]

        rows.append([activity, feret_measure, centroid_distance, area, perimeter])

    result_df = pd.DataFrame(rows, columns=['Activity', 'Feret MeasureZX (SD1)', 'Centroid DistanceZX (SD2)', 'AreaZX (SD3)', 'PerimeterZX (SD4)'])
    return result_df


folder_path = 'wisdm dataset'  
accelerometer_dataframe = create_accelerometer_dataframe(folder_path)

shape_descriptor_dfazx = compute_shape_descriptors(accelerometer_dataframe)


print(shape_descriptor_dfazx)

  feret_measure = (max_val - min_val) / (max_val + min_val)


      Activity  Feret MeasureZX (SD1)  Centroid DistanceZX (SD2)  \
0            A               1.890569                   4.133102   
1            A               2.697244                   3.294880   
2            A               2.134249                   2.995400   
3            A               1.686849                   3.260269   
4            A               1.584218                   3.060871   
...        ...                    ...                        ...   
18077        S               2.717448                   6.204857   
18078        S               3.437473                   6.891487   
18079        S               3.474245                   6.545389   
18080        S               3.033379                   4.918315   
18081        S               3.606777                   5.893984   

       AreaZX (SD3)  PerimeterZX (SD4)  
0        130.441286         535.609822  
1         43.070604         609.639404  
2         -8.869300         494.237553  
3         82.818786

In [167]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

activity_column = shape_descriptor_dfazx['Activity']
feature_columns = shape_descriptor_dfazx.drop('Activity', axis=1)


if np.any(np.isnan(feature_columns)) or np.any(np.isinf(feature_columns)):
    
    feature_columns = feature_columns.replace([np.inf, -np.inf], np.nan).fillna(0)

scaler = MinMaxScaler()
feature_columns_normalized = pd.DataFrame(scaler.fit_transform(feature_columns),
                                          columns=feature_columns.columns)

shape_descriptor_dfazx_normalized = pd.concat([activity_column, feature_columns_normalized], axis=1)

print(shape_descriptor_dfazx_normalized)

      Activity  Feret MeasureZX (SD1)  Centroid DistanceZX (SD2)  \
0            A               0.363960                   0.173303   
1            A               0.363968                   0.138056   
2            A               0.363962                   0.125463   
3            A               0.363958                   0.136600   
4            A               0.363957                   0.128216   
...        ...                    ...                        ...   
18077        S               0.363968                   0.260420   
18078        S               0.363975                   0.289293   
18079        S               0.363975                   0.274740   
18080        S               0.363971                   0.206321   
18081        S               0.363977                   0.247348   

       AreaZX (SD3)  PerimeterZX (SD4)  
0          0.708726           0.124031  
1          0.699299           0.141262  
2          0.693695           0.114401  
3          0.703588

# COMBINED ACC FEATURES 


In [170]:
# Concatenate the three dataframes
frames = [shape_descriptor_df_normalized, shape_descriptor_dfayz_normalized, shape_descriptor_dfazx_normalized]
accelerometer_norm_all = pd.concat(frames, axis=1)

# Print the resulting dataframe
print(accelerometer_norm_all)

      Activity  Feret MeasureXY (SD1)  Centroid DistanceXY (SD2)  \
0            A               0.331873                   0.156731   
1            A               0.331875                   0.141765   
2            A               0.331863                   0.122897   
3            A               0.331867                   0.130613   
4            A               0.331859                   0.116798   
...        ...                    ...                        ...   
18077        S               0.331770                   0.217260   
18078        S               0.331949                   0.250715   
18079        S               0.331203                   0.224592   
18080        S               0.332014                   0.145032   
18081        S               0.331786                   0.273273   

       AreaXY (SD3)  PerimeterXY (SD4) Activity  Feret MeasureYZ (SD1)  \
0          0.292559           0.174989        A               0.417199   
1          0.296488           0.194

In [171]:
accelerometer_norm_all = accelerometer_norm_all.loc[:, ~accelerometer_norm_all.columns.duplicated()]

In [172]:
print(accelerometer_norm_all)

      Activity  Feret MeasureXY (SD1)  Centroid DistanceXY (SD2)  \
0            A               0.331873                   0.156731   
1            A               0.331875                   0.141765   
2            A               0.331863                   0.122897   
3            A               0.331867                   0.130613   
4            A               0.331859                   0.116798   
...        ...                    ...                        ...   
18077        S               0.331770                   0.217260   
18078        S               0.331949                   0.250715   
18079        S               0.331203                   0.224592   
18080        S               0.332014                   0.145032   
18081        S               0.331786                   0.273273   

       AreaXY (SD3)  PerimeterXY (SD4)  Feret MeasureYZ (SD1)  \
0          0.292559           0.174989               0.417199   
1          0.296488           0.194637               

# GYROSCOPE DATA

# GYRO shape descriptors features XY

In [175]:
import pandas as pd


def extract_ferret_measure(data):
    x_values = []
    y_values = []

   
    for line in data:
       
        values = line.split(',')

      
        x_values.append(float(values[3]))
        y_values.append(float(values[4]))

    
    x_ferret = np.max(x_values) - np.min(x_values)
    y_ferret = np.max(y_values) - np.min(y_values)
    
    F = x_ferret/y_ferret

    return F

def extract_centroid_distance(data):
    x_values = []
    y_values = []

  
    for line in data:
        
        values = line.split(',')

        
        x_values.append(float(values[3]))
        y_values.append(float(values[4]))

    
    centroid_x = np.mean(x_values)
    centroid_y = np.mean(y_values)

    
    distances = np.sqrt((np.array(x_values) - centroid_x) ** 2 + (np.array(y_values) - centroid_y) ** 2)
    centroid_distance = np.mean(distances)

    return centroid_distance



def extract_polygon_area(data):
    x_values = []
    y_values = []

    
    for line in data:
        
        values = line.split(',')

        
        x_values.append(float(values[3]))
        y_values.append(float(values[4]))

    
    area = 0.5 * np.abs(np.dot(x_values, np.roll(y_values, 1)) - np.dot(y_values, np.roll(x_values, 1)))

    return area

def extract_polygon_perimeter(data):
    x_values = []
    y_values = []

   
    for line in data:
        
        values = line.split(',')

    
        x_values.append(float(values[3]))
        y_values.append(float(values[4]))

    
    dx = np.diff(x_values)
    dy = np.diff(y_values)
    perimeter = np.sum(np.sqrt(dx**2 + dy**2))

    return perimeter

def compute_shape_descriptors(data_frame):
    rows = []
    for i in range(0, len(data_frame), 200):
        subset = data_frame.iloc[i:i+200]

        x_signal = subset['X'].values
        y_signal = subset['Y'].values
        signal = np.column_stack((x_signal, y_signal))

        feret_measure = compute_feret_measure(signal)
        centroid_distance = compute_centroid_distance(signal)
        area = compute_area(signal)
        perimeter = compute_perimeter(signal)
        activity = subset['Activity'].iloc[0]

        rows.append([activity, feret_measure, centroid_distance, area, perimeter])

    result_df = pd.DataFrame(rows, columns=['Activity', 'Feret MeasureXY(gyro) (SD1)', 'Centroid DistanceXY(gyro) (SD2)', 'AreaXY(gyro) (SD3)', 'PerimeterXY(gyro) (SD4)'])
    return result_df


folder_path = 'wisdm gyro data'  
accelerometer_dataframe = create_accelerometer_dataframe(folder_path)

shape_descriptor_dfgyroxy = compute_shape_descriptors(accelerometer_dataframe)


print(shape_descriptor_dfgyroxy)


  feret_measure = (max_val - min_val) / (max_val + min_val)


      Activity  Feret MeasureXY(gyro) (SD1)  Centroid DistanceXY(gyro) (SD2)  \
0            A                    13.092213                         2.029006   
1            A                    17.916036                         2.278567   
2            A                     3.898563                         1.909300   
3            A                    17.267186                         2.100625   
4            A                    21.665524                         1.948994   
...        ...                          ...                              ...   
17197        S                   -23.124747                         2.341749   
17198        S                     8.459088                         2.505906   
17199        S                   -48.085934                         2.279203   
17200        S                     4.736936                         2.045305   
17201        S                    13.754630                         2.393159   

       AreaXY(gyro) (SD3)  PerimeterXY(

In [176]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

activity_column = shape_descriptor_dfgyroxy['Activity']
feature_columns = shape_descriptor_dfgyroxy.drop('Activity', axis=1)


if np.any(np.isnan(feature_columns)) or np.any(np.isinf(feature_columns)):
    
    feature_columns = feature_columns.replace([np.inf, -np.inf], np.nan).fillna(0)

scaler = MinMaxScaler()
feature_columns_normalized = pd.DataFrame(scaler.fit_transform(feature_columns),
                                          columns=feature_columns.columns)

shape_descriptor_dfgyroxy_normalized = pd.concat([activity_column, feature_columns_normalized], axis=1)

print(shape_descriptor_dfgyroxy_normalized)

      Activity  Feret MeasureXY(gyro) (SD1)  Centroid DistanceXY(gyro) (SD2)  \
0            A                     0.517489                         0.275261   
1            A                     0.517530                         0.309146   
2            A                     0.517411                         0.259007   
3            A                     0.517524                         0.284985   
4            A                     0.517561                         0.264397   
...        ...                          ...                              ...   
17197        S                     0.517181                         0.317725   
17198        S                     0.517449                         0.340014   
17199        S                     0.516969                         0.309232   
17200        S                     0.517418                         0.277474   
17201        S                     0.517494                         0.324705   

       AreaXY(gyro) (SD3)  PerimeterXY(

# Shape descriptor features YZ

In [178]:
import numpy as np
import pandas as pd


def extract_ferret_measure(data):
    y_values = []
    z_values = []

   
    for line in data:
       
        values = line.split(',')

      
        y_values.append(float(values[3]))
        z_values.append(float(values[4]))

    
    y_ferret = np.max(y_values) - np.min(y_values)
    z_ferret = np.max(z_values) - np.min(z_values)
    
    F = y_ferret/z_ferret

    return F

def extract_centroid_distance(data):
    y_values = []
    z_values = []

  
    for line in data:
        
        values = line.split(',')

        
        y_values.append(float(values[4]))
        z_values.append(float(values[5]))

    
    centroid_y = np.mean(y_values)
    centroid_z = np.mean(z_values)

    
    distances = np.sqrt((np.array(y_values) - centroid_y) ** 2 + (np.array(z_values) - centroid_z) ** 2)
    centroid_distance = np.mean(distances)

    return centroid_distance



def extract_polygon_area(data):
    y_values = []
    z_values = []

    
    for line in data:
        
        values = line.split(',')

        
        y_values.append(float(values[4]))
        z_values.append(float(values[5]))

    
    area = 0.5 * np.abs(np.dot(y_values, np.roll(z_values, 1)) - np.dot(x_values, np.roll(z_values, 1)))

    return area

def extract_polygon_perimeter(data):
    y_values = []
    z_values = []

   
    for line in data:
        
        values = line.split(',')

    
        y_values.append(float(values[4]))
        z_values.append(float(values[5]))

    
    dy = np.diff(y_values)
    dz = np.diff(z_values)
    perimeter = np.sum(np.sqrt(dy**2 + dz**2))

    return perimeter

def compute_shape_descriptors(data_frame):
    rows = []
    for i in range(0, len(data_frame), 200):
        subset = data_frame.iloc[i:i+200]

        y_signal = subset['Y'].values
        z_signal = subset['Z'].values
        signal = np.column_stack((y_signal, z_signal))

        feret_measure = compute_feret_measure(signal)
        centroid_distance = compute_centroid_distance(signal)
        area = compute_area(signal)
        perimeter = compute_perimeter(signal)
        activity = subset['Activity'].iloc[0]

        rows.append([activity, feret_measure, centroid_distance, area, perimeter])

    result_df = pd.DataFrame(rows, columns=['Activity', 'Feret MeasureYZ(gyro) (SD1)', 'Centroid DistanceYZ(gyro) (SD2)', 'AreaYZ(gyro) (SD3)', 'PerimeterYZ(gyro) (SD4)'])
    return result_df


folder_path = 'wisdm gyro data'  
accelerometer_dataframe = create_accelerometer_dataframe(folder_path)

shape_descriptor_dfayzgyro = compute_shape_descriptors(accelerometer_dataframe)


print(shape_descriptor_dfayzgyro)

  feret_measure = (max_val - min_val) / (max_val + min_val)


      Activity  Feret MeasureYZ(gyro) (SD1)  Centroid DistanceYZ(gyro) (SD2)  \
0            A                     6.552911                         3.636689   
1            A                   -50.409912                         3.363248   
2            A                     6.611834                         3.356353   
3            A                     8.059948                         3.295561   
4            A                    14.420221                         3.231853   
...        ...                          ...                              ...   
17197        S                     5.191542                         1.804195   
17198        S                    -3.475012                         1.660166   
17199        S                    -4.248348                         1.689113   
17200        S                    -5.789590                         1.445124   
17201        S                    -4.341001                         1.703736   

       AreaYZ(gyro) (SD3)  PerimeterYZ(

In [179]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

activity_column = shape_descriptor_dfayzgyro['Activity']
feature_columns = shape_descriptor_dfayzgyro.drop('Activity', axis=1)


if np.any(np.isnan(feature_columns)) or np.any(np.isinf(feature_columns)):
    
    feature_columns = feature_columns.replace([np.inf, -np.inf], np.nan).fillna(0)

scaler = MinMaxScaler()
feature_columns_normalized = pd.DataFrame(scaler.fit_transform(feature_columns),
                                          columns=feature_columns.columns)

shape_descriptor_dfayzgyro_normalized = pd.concat([activity_column, feature_columns_normalized], axis=1)

print(shape_descriptor_dfayzgyro_normalized)


      Activity  Feret MeasureYZ(gyro) (SD1)  Centroid DistanceYZ(gyro) (SD2)  \
0            A                     0.664543                         0.501606   
1            A                     0.664203                         0.463871   
2            A                     0.664543                         0.462919   
3            A                     0.664551                         0.454530   
4            A                     0.664589                         0.445738   
...        ...                          ...                              ...   
17197        S                     0.664534                         0.248718   
17198        S                     0.664483                         0.228842   
17199        S                     0.664478                         0.232836   
17200        S                     0.664469                         0.199165   
17201        S                     0.664478                         0.234854   

       AreaYZ(gyro) (SD3)  PerimeterYZ(

# Shape descriptor features ZX

In [180]:
import numpy as np
import pandas as pd


def extract_ferret_measure(data):
    z_values = []
    x_values = []

   
    for line in data:
       
        values = line.split(',')

      
        z_values.append(float(values[5]))
        x_values.append(float(values[3]))

    
    z_ferret = np.max(z_values) - np.min(z_values)
    x_ferret = np.max(x_values) - np.min(x_values)
    
    F = z_ferret/x_ferret

    return F

def extract_centroid_distance(data):
    z_values = []
    x_values = []

  
    for line in data:
        
        values = line.split(',')

        
        z_values.append(float(values[5]))
        x_values.append(float(values[3]))

    
    centroid_z = np.mean(z_values)
    centroid_x = np.mean(x_values)

    
    distances = np.sqrt((np.array(z_values) - centroid_z) ** 2 + (np.array(x_values) - centroid_x) ** 2)
    centroid_distance = np.mean(distances)

    return centroid_distance



def extract_polygon_area(data):
    z_values = []
    x_values = []

    
    for line in data:
        
        values = line.split(',')

        
        z_values.append(float(values[5]))
        x_values.append(float(values[3]))

    
    area = 0.5 * np.abs(np.dot(z_values, np.roll(x_values, 1)) - np.dot(y_values, np.roll(x_values, 1)))

    return area

def extract_polygon_perimeter(data):
    z_values = []
    x_values = []

   
    for line in data:
        
        values = line.split(',')

    
        z_values.append(float(values[5]))
        x_values.append(float(values[3]))

    
    dz = np.diff(z_values)
    dx = np.diff(x_values)
    perimeter = np.sum(np.sqrt(dz**2 + dx**2))

    return perimeter

def compute_shape_descriptors(data_frame):
    rows = []
    for i in range(0, len(data_frame), 200):
        subset = data_frame.iloc[i:i+200]

        z_signal = subset['Z'].values
        x_signal = subset['X'].values
        signal = np.column_stack((z_signal, x_signal))

        feret_measure = compute_feret_measure(signal)
        centroid_distance = compute_centroid_distance(signal)
        area = compute_area(signal)
        perimeter = compute_perimeter(signal)
        activity = subset['Activity'].iloc[0]

        rows.append([activity, feret_measure, centroid_distance, area, perimeter])

    result_df = pd.DataFrame(rows, columns=['Activity', 'Feret MeasureZX(gyro) (SD1)', 'Centroid DistanceZX(gyro) (SD2)', 'AreaZX(gyro) (SD3)', 'PerimeterZX(gyro) (SD4)'])
    return result_df


folder_path = 'wisdm gyro data'  
accelerometer_dataframe = create_accelerometer_dataframe(folder_path)

shape_descriptor_dfazxgyro = compute_shape_descriptors(accelerometer_dataframe)


print(shape_descriptor_dfazxgyro)

  feret_measure = (max_val - min_val) / (max_val + min_val)


      Activity  Feret MeasureZX(gyro) (SD1)  Centroid DistanceZX(gyro) (SD2)  \
0            A                     6.552911                         3.748797   
1            A                    17.916036                         3.727840   
2            A                     6.611834                         3.549859   
3            A                     8.059948                         3.465400   
4            A                    14.420221                         3.175047   
...        ...                          ...                              ...   
17197        S                   -23.124747                         2.521102   
17198        S                     8.459088                         2.590616   
17199        S                   -48.085934                         2.177699   
17200        S                     4.736936                         2.002497   
17201        S                    13.754630                         2.184178   

       AreaZX(gyro) (SD3)  PerimeterZX(

In [181]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

activity_column = shape_descriptor_dfazxgyro['Activity']
feature_columns = shape_descriptor_dfazxgyro.drop('Activity', axis=1)


if np.any(np.isnan(feature_columns)) or np.any(np.isinf(feature_columns)):
    
    feature_columns = feature_columns.replace([np.inf, -np.inf], np.nan).fillna(0)

scaler = MinMaxScaler()
feature_columns_normalized = pd.DataFrame(scaler.fit_transform(feature_columns),
                                          columns=feature_columns.columns)

shape_descriptor_dfazxgyro_normalized = pd.concat([activity_column, feature_columns_normalized], axis=1)

print(shape_descriptor_dfazxgyro_normalized)

      Activity  Feret MeasureZX(gyro) (SD1)  Centroid DistanceZX(gyro) (SD2)  \
0            A                     0.517433                         0.561968   
1            A                     0.517530                         0.558825   
2            A                     0.517434                         0.532135   
3            A                     0.517446                         0.519469   
4            A                     0.517500                         0.475926   
...        ...                          ...                              ...   
17197        S                     0.517181                         0.377859   
17198        S                     0.517449                         0.388283   
17199        S                     0.516969                         0.326361   
17200        S                     0.517418                         0.300087   
17201        S                     0.517494                         0.327332   

       AreaZX(gyro) (SD3)  PerimeterZX(

In [183]:
# Concatenate the three dataframes
frames = [shape_descriptor_dfgyroxy_normalized, shape_descriptor_dfayzgyro_normalized, shape_descriptor_dfazxgyro_normalized]
gyroscope_norm_all = pd.concat(frames, axis=1)

# Print the resulting dataframe
print(gyroscope_norm_all)

      Activity  Feret MeasureXY(gyro) (SD1)  Centroid DistanceXY(gyro) (SD2)  \
0            A                     0.517489                         0.275261   
1            A                     0.517530                         0.309146   
2            A                     0.517411                         0.259007   
3            A                     0.517524                         0.284985   
4            A                     0.517561                         0.264397   
...        ...                          ...                              ...   
17197        S                     0.517181                         0.317725   
17198        S                     0.517449                         0.340014   
17199        S                     0.516969                         0.309232   
17200        S                     0.517418                         0.277474   
17201        S                     0.517494                         0.324705   

       AreaXY(gyro) (SD3)  PerimeterXY(

In [185]:
gyroscope_norm_all = gyroscope_norm_all.loc[:, ~gyroscope_norm_all.columns.duplicated()]

In [186]:
print(gyroscope_norm_all)

      Activity  Feret MeasureXY(gyro) (SD1)  Centroid DistanceXY(gyro) (SD2)  \
0            A                     0.517489                         0.275261   
1            A                     0.517530                         0.309146   
2            A                     0.517411                         0.259007   
3            A                     0.517524                         0.284985   
4            A                     0.517561                         0.264397   
...        ...                          ...                              ...   
17197        S                     0.517181                         0.317725   
17198        S                     0.517449                         0.340014   
17199        S                     0.516969                         0.309232   
17200        S                     0.517418                         0.277474   
17201        S                     0.517494                         0.324705   

       AreaXY(gyro) (SD3)  PerimeterXY(

# ALL FEATURES


In [187]:
# Concatenate the three dataframes
frames = [accelerometer_norm_all,gyroscope_norm_all]
All_features = pd.concat(frames, axis=1)

# Print the resulting dataframe
print(All_features)

      Activity  Feret MeasureXY (SD1)  Centroid DistanceXY (SD2)  \
0            A               0.331873                   0.156731   
1            A               0.331875                   0.141765   
2            A               0.331863                   0.122897   
3            A               0.331867                   0.130613   
4            A               0.331859                   0.116798   
...        ...                    ...                        ...   
18077        S               0.331770                   0.217260   
18078        S               0.331949                   0.250715   
18079        S               0.331203                   0.224592   
18080        S               0.332014                   0.145032   
18081        S               0.331786                   0.273273   

       AreaXY (SD3)  PerimeterXY (SD4)  Feret MeasureYZ (SD1)  \
0          0.292559           0.174989               0.417199   
1          0.296488           0.194637               

In [188]:
All_features = All_features.loc[:, ~All_features.columns.duplicated()]

In [189]:
print(All_features)

      Activity  Feret MeasureXY (SD1)  Centroid DistanceXY (SD2)  \
0            A               0.331873                   0.156731   
1            A               0.331875                   0.141765   
2            A               0.331863                   0.122897   
3            A               0.331867                   0.130613   
4            A               0.331859                   0.116798   
...        ...                    ...                        ...   
18077        S               0.331770                   0.217260   
18078        S               0.331949                   0.250715   
18079        S               0.331203                   0.224592   
18080        S               0.332014                   0.145032   
18081        S               0.331786                   0.273273   

       AreaXY (SD3)  PerimeterXY (SD4)  Feret MeasureYZ (SD1)  \
0          0.292559           0.174989               0.417199   
1          0.296488           0.194637               

In [190]:
All_features.dropna(inplace=True)


In [191]:
print(All_features)

      Activity  Feret MeasureXY (SD1)  Centroid DistanceXY (SD2)  \
0            A               0.331873                   0.156731   
1            A               0.331875                   0.141765   
2            A               0.331863                   0.122897   
3            A               0.331867                   0.130613   
4            A               0.331859                   0.116798   
...        ...                    ...                        ...   
17197        F               0.331819                   0.024660   
17198        F               0.331824                   0.023760   
17199        F               0.331821                   0.021745   
17200        F               0.331785                   0.023379   
17201        F               0.331877                   0.034613   

       AreaXY (SD3)  PerimeterXY (SD4)  Feret MeasureYZ (SD1)  \
0          0.292559           0.174989               0.417199   
1          0.296488           0.194637               

In [203]:
from sklearn.model_selection import train_test_split


X = All_features.drop('Activity', axis=1)  
y = All_features['Activity'] 

X_trainf, X_testf, y_trainf, y_testf = train_test_split(X, y, test_size=0.25, random_state=42)

In [204]:
X_trainf

Unnamed: 0,Feret MeasureXY (SD1),Centroid DistanceXY (SD2),AreaXY (SD3),PerimeterXY (SD4),Feret MeasureYZ (SD1),Centroid DistanceYZ (SD2),AreaYZ (SD3),PerimeterYZ (SD4),Feret MeasureZX (SD1),Centroid DistanceZX (SD2),...,AreaXY(gyro) (SD3),PerimeterXY(gyro) (SD4),Feret MeasureYZ(gyro) (SD1),Centroid DistanceYZ(gyro) (SD2),AreaYZ(gyro) (SD3),PerimeterYZ(gyro) (SD4),Feret MeasureZX(gyro) (SD1),Centroid DistanceZX(gyro) (SD2),AreaZX(gyro) (SD3),PerimeterZX(gyro) (SD4)
9527,0.331851,0.012797,0.288427,0.027482,0.416795,0.015228,0.292944,0.024726,0.363966,0.016971,...,0.487611,0.222101,0.664536,0.082122,0.436466,0.115610,0.517414,0.125459,0.558533,0.244357
7599,0.332307,0.029578,0.288227,0.022289,0.416825,0.019439,0.290864,0.016215,0.363951,0.029027,...,0.498617,0.001238,0.664476,0.000439,0.433232,0.000711,0.517412,0.000855,0.557596,0.001380
6211,0.331874,0.118488,0.298667,0.128147,0.416755,0.082311,0.296388,0.071757,0.363958,0.096543,...,0.513489,0.283739,0.664540,0.402159,0.454703,0.285645,0.517306,0.378505,0.564027,0.333675
10393,0.331831,0.047354,0.287994,0.024025,0.416754,0.059466,0.291166,0.025136,0.363867,0.067573,...,0.498589,0.006737,0.664325,0.012477,0.433234,0.004571,0.517361,0.019477,0.557571,0.008339
2459,0.331761,0.141109,0.289415,0.040749,0.416748,0.135021,0.295489,0.034622,0.364320,0.154335,...,0.500418,0.063626,0.664578,0.081282,0.432095,0.037126,0.519391,0.155224,0.558454,0.079735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11284,0.331813,0.036866,0.288273,0.017886,0.416920,0.029888,0.292937,0.014881,0.363951,0.036623,...,0.498609,0.059824,0.664294,0.045774,0.432983,0.043552,0.517315,0.060139,0.556877,0.067907
11964,0.331830,0.101595,0.286377,0.034010,0.416762,0.073718,0.292792,0.024357,0.363919,0.111169,...,0.498618,0.008656,0.666895,0.003015,0.433232,0.002960,0.517360,0.006889,0.557604,0.010843
5390,0.331826,0.061144,0.288335,0.036832,0.416660,0.047957,0.293213,0.027792,0.364030,0.062319,...,0.498614,0.031172,0.664465,0.019157,0.433210,0.014851,0.518384,0.043633,0.557495,0.036596
860,0.331827,0.145009,0.286980,0.025116,0.416759,0.098060,0.292660,0.019410,0.363915,0.141180,...,0.498483,0.020838,0.664484,0.047124,0.433212,0.011922,0.517327,0.063252,0.556746,0.028425


In [205]:
y_trainf

9527     Q
7599     D
6211     A
10393    J
2459     J
        ..
11284    F
11964    H
5390     I
860      L
15795    C
Name: Activity, Length: 12901, dtype: object

# KNN

In [207]:
from sklearn.neighbors import KNeighborsClassifier


knn = KNeighborsClassifier(n_neighbors=5)


knn.fit(X_trainf, y_trainf)


y_pred1 = knn.predict(X_testf)


accuracy = knn.score(X_testf, y_testf)


print("Accuracy:", accuracy)

Accuracy: 0.6561264822134387


# SVM

In [None]:
# from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

svm_classifier = SVC()

param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf'], 'gamma': [0.1, 1, 10]}

grid_search = GridSearchCV(estimator=svm_classifier, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_trainf, y_trainf)

best_svm_classifier = grid_search.best_estimator_

best_svm_classifier.fit(X_trainf, y_trainf)

y_predf = best_svm_classifier.predict(X_testf)

In [213]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_testf, y_predf)
print("Accuracy:", accuracy)

Accuracy: 0.6614740757963264


In [211]:
from sklearn.metrics import classification_report

report = classification_report(y_testf, y_predf)
print("Classification Report:")
print(report)


Classification Report:
              precision    recall  f1-score   support

           A       0.82      0.75      0.78       225
           B       0.95      0.93      0.94       229
           C       0.74      0.73      0.74       226
           D       0.37      0.55      0.44       266
           E       0.36      0.47      0.41       255
           F       0.67      0.73      0.70       222
           G       0.79      0.68      0.73       250
           H       0.51      0.62      0.56       222
           I       0.47      0.44      0.46       234
           J       0.58      0.51      0.54       257
           K       0.58      0.55      0.56       242
           L       0.39      0.28      0.33       214
           M       0.74      0.79      0.76       236
           O       0.89      0.81      0.85       240
           P       0.85      0.82      0.84       227
           Q       0.79      0.58      0.67       269
           R       0.91      0.83      0.87       242
    

In [214]:
from sklearn.metrics import confusion_matrix

# Assuming you have the true labels y_test and the predicted labels y_pred

# Generate the confusion matrix
cm = confusion_matrix(y_testf, y_predf)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)


Confusion Matrix:
[[168   8  21   0   0   0   3   0   0   0   0   0  13   1   1   0   0  10]
 [  0 214   5   0   0   0   0   0   0   0   0   0   3   6   0   0   0   1]
 [ 12   1 166   1   0   0   1   1   0   0   2   2  25   1   4   0   0  10]
 [  1   0   0 145  56   5   4  10  11   6  14   6   1   0   1   3   3   0]
 [  0   0   1  90 120   4   0   6   3  11   7  10   0   0   0   2   0   1]
 [  1   0   0   9  14 163  10   2   1   0   0   2   1   2   0  15   2   0]
 [  0   0   3  27   5  11 170   3   2   2   4   2   5   0   2   5   6   3]
 [  0   0   0   5  14   5   0 138  21  17   6  14   0   0   0   2   0   0]
 [  0   0   0  13  12   1   1  40 104  29  16  13   0   0   1   2   0   2]
 [  0   0   0  15  15   1   0  21  38 131   8  14   0   0   0  10   1   3]
 [  0   0   0  32  12   4   0  20  13   4 132  21   0   0   1   2   0   1]
 [  0   0   4  17  34   0   1  23  23  21  26  60   2   0   0   0   1   2]
 [ 12   1  16   0   3   1   1   0   1   0   0   1 187   8   4   0   0   1]
 [  3  

# KNN

In [275]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import confusion_matrix

knn_classifier = KNeighborsClassifier()
my_param_grid = {'n_neighbors': [5, 10, 20], 'leaf_size': [20, 30, 40]}
my_cv = KFold(n_splits=10, shuffle=True)  

knn_model_gs = GridSearchCV(estimator=knn_classifier, 
                            param_grid=my_param_grid,
                            cv=my_cv,
                            scoring='accuracy')

knn_model_gs.fit(X_trainf, y_trainf)

best_knn_model = knn_model_gs.best_estimator_

best_knn_model.fit(X_trainf, y_trainf)

y_predkf = best_knn_model.predict(X_testf)


cm = confusion_matrix(y_testf, y_predkf)


print("Confusion Matrix:")
print(cm)


Confusion Matrix:
[[165   7  17   0   0   0   3   0   0   0   0   1  16   1   1   0   1  13]
 [  1 215   1   0   0   0   0   0   0   0   0   0   3   2   2   0   0   5]
 [ 25   0 152   1   0   0   2   0   3   2   2   1  17   1   0   0   1  19]
 [  1   0   4 137  51   7   4   9   9  16   9   7   3   1   2   0   3   3]
 [  2   0   1  41 142   6   2   5  11  10  15  11   1   1   0   2   3   2]
 [  4   0   2   6   9 155   9   2   3   0   0   4   1   1   1  25   0   0]
 [  3   0   5  12   4   6 176   1   5   2   3   2   2   2   1   4  17   5]
 [  1   0   1   4   3   2   6 156  19  11   7   9   1   0   0   1   0   1]
 [  2   0   1   7   8   1   1  50 116  23  11   9   1   1   0   3   0   0]
 [  0   0   3  12   6   6   2  39  32 120  14  10   1   0   0   8   0   4]
 [  0   0   2  18  18   2   0  20  22   8 126  15   1   0   0   6   3   1]
 [  1   0   3  16  23   1   4  32  33   8  17  66   2   1   1   0   2   4]
 [ 29   1  39   1   0   2   4   1   0   2   1   1 139   7   1   0   0   8]
 [  2  

In [276]:
knn_model_gs.fit(X_trainf, y_trainf)

GridSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=True),
             estimator=KNeighborsClassifier(),
             param_grid={'leaf_size': [20, 30, 40], 'n_neighbors': [5, 10, 20]},
             scoring='accuracy')

In [277]:
knn_best_classifier = knn_model_gs.best_estimator_

In [278]:
print(knn_model_gs.best_params_)

{'leaf_size': 20, 'n_neighbors': 5}


In [279]:
knn_model_gs.cv_results_

{'mean_fit_time': array([0.00888319, 0.00873027, 0.00870836, 0.00867386, 0.00868702,
        0.00870235, 0.0086098 , 0.00885611, 0.00869458]),
 'std_fit_time': array([0.00076308, 0.00030818, 0.0002145 , 0.00043945, 0.00050345,
        0.00053852, 0.00052041, 0.00035204, 0.00049316]),
 'mean_score_time': array([0.27608943, 0.27114155, 0.27415853, 0.27216001, 0.27264791,
        0.27483385, 0.27171485, 0.26939268, 0.27440531]),
 'std_score_time': array([0.00856027, 0.00595158, 0.00607182, 0.00456605, 0.00597556,
        0.00721506, 0.00407688, 0.00615421, 0.00657579]),
 'param_leaf_size': masked_array(data=[20, 20, 20, 30, 30, 30, 40, 40, 40],
              mask=[False, False, False, False, False, False, False, False,
                    False],
        fill_value='?',
             dtype=object),
 'param_n_neighbors': masked_array(data=[5, 10, 20, 5, 10, 20, 5, 10, 20],
              mask=[False, False, False, False, False, False, False, False,
                    False],
        fill_va

In [280]:
knn_best_classifier.get_params()

{'algorithm': 'auto',
 'leaf_size': 20,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [281]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(knn_best_classifier, X_trainf, y_trainf, cv=my_cv, scoring='accuracy')
list(scores)

[0.6529821843532145,
 0.6271317829457365,
 0.6581395348837209,
 0.6496124031007752,
 0.6511627906976745,
 0.6387596899224807,
 0.6356589147286822,
 0.6496124031007752,
 0.6511627906976745,
 0.6372093023255814]

In [282]:
y_train_predkf=knn_best_classifier.predict(X_trainf)

In [284]:
accuracy_score(y_true=y_trainf, y_pred=y_train_predkf)

0.7660646461514611

In [224]:
y_test_predkf = knn_best_classifier.predict(X_testf)

In [225]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_testf, y_test_predkf)


print("Confusion Matrix:")
print(cm)


Confusion Matrix:
[[165   7  17   0   0   0   3   0   0   0   0   1  16   1   1   0   1  13]
 [  1 215   1   0   0   0   0   0   0   0   0   0   3   2   2   0   0   5]
 [ 25   0 152   1   0   0   2   0   3   2   2   1  17   1   0   0   1  19]
 [  1   0   4 137  51   7   4   9   9  16   9   7   3   1   2   0   3   3]
 [  2   0   1  41 142   6   2   5  11  10  15  11   1   1   0   2   3   2]
 [  4   0   2   6   9 155   9   2   3   0   0   4   1   1   1  25   0   0]
 [  3   0   5  12   4   6 176   1   5   2   3   2   2   2   1   4  17   5]
 [  1   0   1   4   3   2   6 156  19  11   7   9   1   0   0   1   0   1]
 [  2   0   1   7   8   1   1  50 116  23  11   9   1   1   0   3   0   0]
 [  0   0   3  12   6   6   2  39  32 120  14  10   1   0   0   8   0   4]
 [  0   0   2  18  18   2   0  20  22   8 126  15   1   0   0   6   3   1]
 [  1   0   3  16  23   1   4  32  33   8  17  66   2   1   1   0   2   4]
 [ 29   1  39   1   0   2   4   1   0   2   1   1 139   7   1   0   0   8]
 [  2  

# SVM

In [226]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

svm_classifier = SVC()

param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf'], 'gamma': [0.1, 1, 10]}

grid_search = GridSearchCV(estimator=svm_classifier, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_trainf, y_trainf)

best_svm_classifier = grid_search.best_estimator_

best_svm_classifier.fit(X_trainf, y_trainf)

y_predf = best_svm_classifier.predict(X_testf)


In [228]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_testf, y_predf)
print("Accuracy:", accuracy)

Accuracy: 0.6614740757963264


# Decision Trees

In [230]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

param_grid = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5],
    'max_features': [25, 'sqrt', 'log2'],
    'random_state': [42]
}

dt_classifier = DecisionTreeClassifier()

grid_search = GridSearchCV(estimator=dt_classifier,
                           param_grid=param_grid,
                           cv=my_cv,
                           scoring='accuracy',
                           verbose=1,
                           return_train_score=True)

grid_search.fit(X_trainf, y_trainf)

Fitting 5 folds for each of 432 candidates, totalling 2160 fits


Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]

Traceback (most recent call last):
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 903, in fit
    super().fit(
  File "C:\Users\chiri\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 289, in fit
    raise ValueError("max_features must be in (0, n_feature

GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=True),
             estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [None, 5, 10, 15],
                         'max_features': [25, 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 5],
                         'min_samples_split': [2, 5, 10], 'random_state': [42],
                         'splitter': ['best', 'random']},
             return_train_score=True, scoring='accuracy', verbose=1)

In [232]:
from sklearn.metrics import accuracy_score

best_dt_model = grid_search.best_estimator_
best_dt_model.fit(X_trainf, y_trainf)

y_preddtf = best_dt_model.predict(X_testf)

accuracy = accuracy_score(y_testf, y_preddtf)
print("Accuracy:", accuracy)

Accuracy: 0.5638223668914206


# Random Forests


In [234]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf_classifier = RandomForestClassifier()

rf_classifier.fit(X_trainf, y_trainf)

y_pred_rf = rf_classifier.predict(X_testf)

accuracy_rf = accuracy_score(y_testf, y_pred_rf)
print("Random Forest Accuracy:", accuracy_rf)

Random Forest Accuracy: 0.7684259474540804


In [241]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier = RandomForestClassifier(n_estimators=500, max_depth=None, min_samples_split=10, min_samples_leaf=5, 
                                       max_features='auto', random_state=42)

rf_classifier.fit(X_trainf, y_trainf)
y_pred_rf = rf_classifier.predict(X_testf)

In [242]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_testf, y_pred_rf)

print("Accuracy: ", accuracy)

Accuracy:  0.7470355731225297


# Desicion Trees

In [243]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GroupKFold

my_cv = StratifiedShuffleSplit(n_splits=5, train_size=0.7, test_size=0.3)

In [244]:
dt_classifier = DecisionTreeClassifier()

In [245]:
my_param_grid = {'min_samples_leaf': [6, 10, 20, 40],
                 'min_weight_fraction_leaf': [0.01, 0.02, 0.05],
                 'criterion': ['entropy'],
                 'min_impurity_decrease': [1e-2, 7e-3]}

In [246]:
dt_model_gs = GridSearchCV(estimator=dt_classifier, 
                           param_grid=my_param_grid, 
                           cv=my_cv, 
                           scoring='accuracy',
                           verbose = 0,
                           return_train_score = True)

In [247]:
dt_model_gs.fit(X_trainf, y_trainf)

GridSearchCV(cv=StratifiedShuffleSplit(n_splits=5, random_state=None, test_size=0.3,
            train_size=0.7),
             estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['entropy'],
                         'min_impurity_decrease': [0.01, 0.007],
                         'min_samples_leaf': [6, 10, 20, 40],
                         'min_weight_fraction_leaf': [0.01, 0.02, 0.05]},
             return_train_score=True, scoring='accuracy')

In [248]:

dt_model_gs.best_params_

{'criterion': 'entropy',
 'min_impurity_decrease': 0.007,
 'min_samples_leaf': 6,
 'min_weight_fraction_leaf': 0.01}

In [249]:
dt_best_classifier = dt_model_gs.best_estimator_

In [250]:
dt_model_gs.cv_results_

{'mean_fit_time': array([0.44637995, 0.37175779, 0.26531577, 0.45848732, 0.38859358,
        0.27151303, 0.44843221, 0.37472215, 0.26606092, 0.49661303,
        0.38837976, 0.25806203, 0.44504237, 0.36951571, 0.25722904,
        0.44671726, 0.3699224 , 0.25815883, 0.4464323 , 0.36683884,
        0.25862193, 0.44758134, 0.37263079, 0.26145077]),
 'std_fit_time': array([0.01535571, 0.00426577, 0.00626331, 0.01593634, 0.01508031,
        0.00798791, 0.00928608, 0.01157923, 0.01156787, 0.03142598,
        0.01751826, 0.00360549, 0.00543521, 0.00492988, 0.00173819,
        0.0040759 , 0.00431846, 0.00243607, 0.00367373, 0.00426694,
        0.00349716, 0.00378315, 0.00787127, 0.00517093]),
 'mean_score_time': array([0.0157701 , 0.00365353, 0.00348926, 0.003793  , 0.00448494,
        0.00332341, 0.00402856, 0.00393915, 0.08646069, 0.0037291 ,
        0.00410318, 0.00345831, 0.00372763, 0.00347981, 0.00408564,
        0.00364604, 0.00350924, 0.00348182, 0.00382462, 0.00394711,
        0.003165

In [251]:
dt_best_classifier.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.007,
 'min_impurity_split': None,
 'min_samples_leaf': 6,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.01,
 'random_state': None,
 'splitter': 'best'}

In [252]:
dt_best_classifier.get_depth()

8

In [253]:
dt_best_classifier.get_n_leaves()

48

In [254]:
scores = cross_val_score(dt_best_classifier, X_train, y_train, cv=my_cv, scoring='accuracy')
list(scores)

[0.49909584086799275,
 0.49625419788168434,
 0.5040041332988892,
 0.5045207956600362,
 0.4879876001033325]

In [255]:
dt_best_classifier.score(X_train, y_train)

0.5156964576389427

# KNN

In [257]:
from sklearn.neighbors import KNeighborsClassifier
knn_classifier = KNeighborsClassifier()
my_param_grid = {'n_neighbors': [5, 10, 20], 'leaf_size': [20, 30, 40]}
knn_model_gs = GridSearchCV(estimator = knn_classifier, 
                            param_grid = my_param_grid,
                            cv = my_cv, 
                            scoring ='accuracy')
knn_model_gs.fit(X_trainf, y_trainf)

GridSearchCV(cv=StratifiedShuffleSplit(n_splits=5, random_state=None, test_size=0.3,
            train_size=0.7),
             estimator=KNeighborsClassifier(),
             param_grid={'leaf_size': [20, 30, 40], 'n_neighbors': [5, 10, 20]},
             scoring='accuracy')

In [258]:
knn_best_classifier = knn_model_gs.best_estimator_
print(knn_model_gs.best_params_)

{'leaf_size': 20, 'n_neighbors': 5}


In [259]:
knn_model_gs.cv_results_

{'mean_fit_time': array([0.00821424, 0.00821495, 0.00773201, 0.00797744, 0.00788116,
        0.00789647, 0.00777869, 0.0115767 , 0.01136408]),
 'std_fit_time': array([5.12437040e-04, 4.44188747e-04, 5.40210006e-04, 3.55363672e-06,
        1.95265428e-04, 1.62684050e-04, 3.98309000e-04, 3.37312182e-03,
        2.71534811e-03]),
 'mean_score_time': array([0.66125107, 0.66178765, 0.6680635 , 0.6548027 , 0.65978289,
        0.66180158, 0.65795627, 0.92855067, 0.90411515]),
 'std_score_time': array([0.02803561, 0.03065129, 0.03001899, 0.02304812, 0.02440995,
        0.02697454, 0.0249277 , 0.18469636, 0.14444125]),
 'param_leaf_size': masked_array(data=[20, 20, 20, 30, 30, 30, 40, 40, 40],
              mask=[False, False, False, False, False, False, False, False,
                    False],
        fill_value='?',
             dtype=object),
 'param_n_neighbors': masked_array(data=[5, 10, 20, 5, 10, 20, 5, 10, 20],
              mask=[False, False, False, False, False, False, False, False,

In [260]:
knn_best_classifier.get_params()

{'algorithm': 'auto',
 'leaf_size': 20,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [261]:
scores = cross_val_score(knn_best_classifier, X_train, y_train, cv=my_cv, scoring='accuracy')
list(scores)

[0.6357530353913717,
 0.6427279772668562,
 0.6311030741410488,
 0.6249031258072849,
 0.6331697235856368]

In [262]:
y_train_pred=knn_best_classifier.predict(X_train)

In [263]:
accuracy_score(y_true=y_train, y_pred=y_train_pred)

0.7660646461514611

# Logistic Regression

In [264]:
from sklearn.linear_model import LogisticRegression
import warnings

warnings.filterwarnings('ignore')

In [265]:
lr_classifier = LogisticRegression(verbose = 0)

In [266]:
my_param_grid = {'C': [10, 50, 100, 200]}

In [267]:
lr_model_gs = GridSearchCV(estimator=lr_classifier, 
                           param_grid=my_param_grid, 
                           cv=my_cv, 
                           scoring = 'accuracy')

In [268]:
lr_model_gs.fit(X_train, y_train)

GridSearchCV(cv=StratifiedShuffleSplit(n_splits=5, random_state=None, test_size=0.3,
            train_size=0.7),
             estimator=LogisticRegression(),
             param_grid={'C': [10, 50, 100, 200]}, scoring='accuracy')

In [269]:
lr_best_classifier = lr_model_gs.best_estimator_

In [270]:
lr_best_classifier.get_params()

{'C': 10,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [271]:
scores = cross_val_score(lr_best_classifier, X_train, y_train, cv=my_cv, scoring='accuracy')
list(scores)

[0.46861276156032033,
 0.4631878067682769,
 0.4709377421854818,
 0.4794626711444071,
 0.49082924308964093]

In [272]:
y_train_pred=lr_best_classifier.predict(X_train)

In [273]:
accuracy_score(y_true=y_train, y_pred=y_train_pred)

0.48073792729245796

# SVM

In [274]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


svm_classifier = SVC()


param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}


grid_search = GridSearchCV(svm_classifier, param_grid, scoring='accuracy')
grid_search.fit(X_trainf, y_trainf)
best_classifier = grid_search.best_estimator_


print("Best parameters:", grid_search.best_params_)


y_pred_svm = best_classifier.predict(X_testf)


accuracy = accuracy_score(y_testf, y_pred_svm)
print("Accuracy:", accuracy)

Best parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 0.5728900255754475
