# Web Scraping

In [None]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from urllib.request import urlopen
import time
import random
import os

## Obtaining Last 20 years of Draft Results

In [None]:
# List of last eligible 20 years to scrape data. 2020-2023 left out since their four-year
# rookie deal length are not yet completed.
YEARS = list(range(2000, 2020))

## College Results

In case, this notebook needs to be rerun, and previously scraped data is now affected by overloading request limits, reload the wr and rb dataframes saved to the drive.

In [None]:
rb_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data-Collection/rb/rb_draft_output.csv')

## Consolidating Dataset

The datasets will be merged into a single wr and A single rb dataset. The set of players will stick to the limits of the nfl drafted rbs and wrs. The combine data will me merged left to this draft dataset. 

The fantasy dataset will be prepped first, since we will not need to predict the specifics of how each player will reach the projected fpts total, but rather just this individual total will be predicted.

It is important how we choose to predict the fpts. We could predict total fpts overall, however, role players that are able to stay in the league a long time but fail to produce a high peak of their careers are rewarded in this model more that players have a high "ceiling". This model seeks to predict a player's peak since high performing players with low years in the year, but are highly succesful in those years are more valuable than the low consistent players.

The predicted column will be a combination of many successful years and how successful the player is in those respective years. We will choose how many top 24 seasons they produce in their careers. To achieve this number, a player must be in the top 24 of their cumaltive sum of fantasy points scored with respect to their position. Since their are 32 teams, this statistic typically implies if they are best wide receiver or running back not only on their team, but also are almost a top 2/3 starting running back or wide receiver in the league. 

In [None]:
# Summarizing fantasy points scored data into number of top 24 running back or top 24 wide receiver season. 
# Define a list to store the filtered data for each year
rb_fantasy = []

# Loop over the list of years
for year in range(2001, 2022):
  # try:
    # Define the file name for the current year
    file_name = f"/content/drive/MyDrive/Colab Notebooks/Data-Collection/Fantasy/{year}.csv"
    
    # Load the csv file into a pandas dataframe
    df = pd.read_csv(file_name)
    
    # Filter the data where the "pos" column is either "RB" or "WR"
    rb_df = df[df['Pos'] == 'RB']
    
    # Sort the data by "FantasyPoints" column in descending order
    rb_df = rb_df.sort_values('FantasyPoints', ascending=False).reset_index(drop=True)
    
    # Select only the top 24 players and keep only the "FantasyPoints" and "Player" columns
    rb_df = rb_df[['FantasyPoints', 'Player']].head(24)
    
    # Append the filtered data to the list for each position
    rb_fantasy.append(rb_df)
  # except:
    # print("Error Year: " + str(year))


In [None]:
file_path = '/content/drive/MyDrive/Colab Notebooks/Data-Collection/Fantasy/'

# Concatenate the dataframes vertically
rb_fantasy = pd.concat(rb_fantasy)

# Count the number of top 24 seasons for each player
rb_counts = rb_fantasy['Player'].value_counts()

# Create new dataframes with two columns: "Player" and "Top_24_Seasons"
rb_summary = pd.DataFrame({'Player': rb_counts.index, 'Top_24_Seasons': rb_counts.values})

# Write the summary dataframes to csv files
rb_summary.to_csv(file_path + 'RB_summary.csv', index=False)

# Write the combined dataframes to csv files
rb_fantasy.to_csv(file_path + 'RB_top_24_players.csv', index=False)

## Merging Datasets

In [None]:
# Merge draft with fantasy points scored.

# Define the file path to the directory containing the csv files
file_path = '/content/drive/MyDrive/Colab Notebooks/Data-Collection/Fantasy/'

# Load the rb and wr summary dataframes
rb_summary = pd.read_csv(file_path + 'RB_summary.csv')

# Load the rb and wr draft dataframes
rb_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data-Collection/rb/rb_draft_output.csv')

rb_combine = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data-Collection/rb/rb_combine_output.csv')

# Merge original with fantasy
rb_merged = pd.merge(rb_df, rb_summary, on='Player', how='left')

# Fill in NaN values with 0
rb_merged['Top_24_Seasons'] = rb_merged['Top_24_Seasons'].fillna(0)

# Add in Combine Dataset using left merge on orginal draft names
rb_merged_combine = pd.merge(rb_merged, rb_combine, on='Player', how='left')

rb_college_total = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data-Collection/rb/rb_college_output.csv')
rb_college_total.rename(columns={rb_college_total.columns[0]: "CollegeYear"}, inplace=True)
rb_college_total['CollegeYear'] = rb_college_total['CollegeYear']  + 1

rb_college_total.head()




Unnamed: 0,CollegeYear,School,Conf,Class,Pos,G,Att,Yds,Avg,TD,Rec,Yds.1,Avg.1,TD.1,Plays,Yds.2,Avg.2,TD.2,Player
0,1,Virginia,ACC,,RB,11.0,36.0,205.0,5.7,4.0,4.0,26.0,6.5,0.0,40,231,5.8,4,Thomas Jones
1,2,Virginia,ACC,,RB,11.0,201.0,692.0,3.4,4.0,17.0,127.0,7.5,1.0,218,819,3.8,5,Thomas Jones
2,3,Virginia,ACC,,RB,11.0,238.0,1303.0,5.5,13.0,28.0,179.0,6.4,2.0,266,1482,5.6,15,Thomas Jones
3,4,Virginia,ACC,,RB,11.0,334.0,1798.0,5.4,16.0,22.0,239.0,10.9,1.0,356,2037,5.7,17,Thomas Jones
4,1,Wisconsin,Big Ten,,RB,13.0,325.0,2109.0,6.5,21.0,14.0,133.0,9.5,0.0,339,2242,6.6,21,Ron Dayne


In [None]:
# Display the dataset feature variable types and null values
tab_info=pd.DataFrame(rb_college_total.dtypes).T.rename(index={0:'Variable Type'})
tab_info=tab_info.append(pd.DataFrame(rb_college_total.isnull().sum()).T.rename(index={0:'Null Values'}))
tab_info = tab_info.append(pd.DataFrame(rb_college_total.isnull().sum()/rb_college_total.shape[0]*100).T.rename(index={0:'Null Value (%)'}))
tab_info

  tab_info=tab_info.append(pd.DataFrame(rb_college_total.isnull().sum()).T.rename(index={0:'Null Values'}))
  tab_info = tab_info.append(pd.DataFrame(rb_college_total.isnull().sum()/rb_college_total.shape[0]*100).T.rename(index={0:'Null Value (%)'}))


Unnamed: 0,CollegeYear,School,Conf,Class,Pos,G,Att,Yds,Avg,TD,Rec,Yds.1,Avg.1,TD.1,Plays,Yds.2,Avg.2,TD.2,Player
Variable Type,int64,object,object,object,object,float64,float64,float64,float64,float64,float64,float64,float64,float64,int64,int64,float64,int64,object
Null Values,0,0,0,145,0,2,12,12,15,12,64,64,78,64,0,0,11,0,0
Null Value (%),0.0,0.0,0.0,13.086643,0.0,0.180505,1.083032,1.083032,1.353791,1.083032,5.776173,5.776173,7.039711,5.776173,0.0,0.0,0.99278,0.0,0.0


For preparing data for use in a Recurrent Neural Network, I do not need to consolidate a player's college years into one year, but can rather can keep each year as a seperate row related to the player. However, the draft and combine information will be repeated in each of these rows.

Now the college-total dataset is merged left with previous dataset so only players will their full college stats will be included in the resulted dataset.

In [None]:
# Merge with College
rb_merged_college = pd.merge(rb_merged_combine, rb_college_total, on='Player', how='left')
rb_merged_college.drop_duplicates()
rb_merged_college['CollegeYear'] = rb_merged_college['CollegeYear'].replace(0, 1)

rb_merged_college = rb_merged_college.dropna(subset=['Avg'])
rb_merged_college.head()

Unnamed: 0,Unnamed: 0_x,Pick,Tm,Player,Pos_x,Age,To,AP1,PB,St,...,Avg,TD_y,Rec_y,Yds.1_y,Avg.1,TD.1_y,Plays,Yds.2_y,Avg.2,TD.2_y
1,6,7,ARI,Thomas Jones,RB,22.0,2011.0,0,1,9,...,5.7,4.0,4.0,26.0,6.5,0.0,40.0,231.0,5.8,4.0
2,6,7,ARI,Thomas Jones,RB,22.0,2011.0,0,1,9,...,3.4,4.0,17.0,127.0,7.5,1.0,218.0,819.0,3.8,5.0
3,6,7,ARI,Thomas Jones,RB,22.0,2011.0,0,1,9,...,5.5,13.0,28.0,179.0,6.4,2.0,266.0,1482.0,5.6,15.0
4,6,7,ARI,Thomas Jones,RB,22.0,2011.0,0,1,9,...,5.4,16.0,22.0,239.0,10.9,1.0,356.0,2037.0,5.7,17.0
5,10,11,NYG,Ron Dayne,RB,22.0,2007.0,0,0,1,...,6.5,21.0,14.0,133.0,9.5,0.0,339.0,2242.0,6.6,21.0


In [None]:
rb_merged_college.to_csv('/content/drive/MyDrive/Colab Notebooks/Data-Collection/rb/rb_merged_college_roughoutput1.csv', index=False)


In [None]:
rb_merged_college = rb_merged_college.drop(['Unnamed: 0_x','Tm','Pos_x','To','AP1','PB','St','Cmp','Att_x','Yds_x','TD_x','Int', 'Solo',
       'Int.1','Sk','College/Univ','Unnamed: 28', 'Pos_y', 'School_x', 'College', 'Drafted (tm/rnd/yr)', 'School_y', 'Conf', 
       'Class', 'Pos'], axis=1)

In [None]:
print(rb_merged_college.columns)

Index(['Pick', 'Player', 'Age', 'wAV', 'DrAV', 'G_x', 'Att.1', 'Yds.1_x',
       'TD.1_x', 'Rec_x', 'Yds.2_x', 'TD.2_x', 'Top_24_Seasons',
       'Unnamed: 0_y', 'Ht', 'Wt', '40yd', 'Vertical', 'Bench', 'Broad Jump',
       '3Cone', 'Shuttle', 'CollegeYear', 'G_y', 'Att_y', 'Yds_y', 'Avg',
       'TD_y', 'Rec_y', 'Yds.1_y', 'Avg.1', 'TD.1_y', 'Plays', 'Yds.2_y',
       'Avg.2', 'TD.2_y'],
      dtype='object')


In [None]:
# define a function to convert height to inches
def convert_to_inches(height):
    if isinstance(height, float):
        return height
    feet, inches = height.split('-')
    return (int(feet) * 12) + int(inches)

# apply the function to the height column and convert to float type
rb_merged_college['Ht'] = rb_merged_college['Ht'].apply(convert_to_inches).astype(float)


## Interpolate values with KNNInputer and Linear Inputation


In [None]:
from sklearn.impute import KNNImputer

rb_merged_college['Age']=rb_merged_college['Age'].interpolate(method='linear',limit_direction='both',axis=0)
rb_merged_college['wAV']=rb_merged_college['wAV'].interpolate(method='linear',limit_direction='both',axis=0)
rb_merged_college['DrAV']=rb_merged_college['DrAV'].interpolate(method='linear',limit_direction='both',axis=0)
rb_merged_college['Wt']=rb_merged_college['Wt'].interpolate(method='linear',limit_direction='both',axis=0)
rb_merged_college['Ht']=rb_merged_college['Ht'].interpolate(method='linear',limit_direction='both',axis=0)


# select the columns to interpolate
college_columns_to_interpolate = ['G_y', 'Att_y', 'Yds_y', 'Avg', 'TD_y', 'Rec_y', 'Yds.1_y', 'Avg.1', 'TD.1_y', 'Plays', 'CollegeYear']
combine_columns_to_interpolate = ['40yd', 'Vertical', 'Bench', 'Broad Jump', '3Cone', 'Shuttle']


# create a KNN imputer object with 3 nearest neighbors
imputer = KNNImputer(n_neighbors=3)

# interpolate the columns
rb_merged_college[college_columns_to_interpolate] = imputer.fit_transform(rb_merged_college[college_columns_to_interpolate])
rb_merged_college[combine_columns_to_interpolate] = imputer.fit_transform(rb_merged_college[combine_columns_to_interpolate])

rb_merged_college = rb_merged_college.drop(['Unnamed: 0_y', 'TD.2_y', 'Avg.2', 'Yds.2_y', 'G_x', 'Att.1', 'Yds.1_x',
       'TD.1_x', 'Rec_x', 'Yds.2_x', 'TD.2_x'], axis=1)


In [None]:
# Rename columns to more interpretable title
column_mapping = {
    'G_y': 'Games',
    'Att_y': 'Rush_Att',
    'Yds_y': 'Rush_Yds',
    'Avg': 'RushYds_Avg',
    'TD_y': 'Rush_TD',
    'Rec_y': 'Rec',
    'Yds.1_y': 'Rec_Yds',
    'Avg.1': 'RecYds_Avg',
    'TD.1_y': 'Rec_TD'
}


rb_merged_college = rb_merged_college.rename(columns=column_mapping)

In [None]:
# Write the merged dataframes to csv files
rb_merged_college.drop_duplicates()

rb_merged_college.to_csv('/content/drive/MyDrive/Colab Notebooks/Data-Collection/rb/rb_college_summaryRNN.csv', index=False)
# wr_merged_combine.to_csv('/content/drive/MyDrive/Colab Notebooks/Data-Collection/wr/wr_combine_summary.csv', index=False)

In [None]:
rb_merged_college.head()

Unnamed: 0,Pick,Player,Age,wAV,DrAV,Top_24_Seasons,Ht,Wt,40yd,Vertical,...,Games,Rush_Att,Rush_Yds,RushYds_Avg,Rush_TD,Rec,Rec_Yds,RecYds_Avg,Rec_TD,Plays
1,7,Thomas Jones,22.0,62.0,7.0,6.0,70.0,216.0,4.45,35.5,...,11.0,36.0,205.0,5.7,4.0,4.0,26.0,6.5,0.0,40.0
2,7,Thomas Jones,22.0,62.0,7.0,6.0,70.0,216.0,4.45,35.5,...,11.0,201.0,692.0,3.4,4.0,17.0,127.0,7.5,1.0,218.0
3,7,Thomas Jones,22.0,62.0,7.0,6.0,70.0,216.0,4.45,35.5,...,11.0,238.0,1303.0,5.5,13.0,28.0,179.0,6.4,2.0,266.0
4,7,Thomas Jones,22.0,62.0,7.0,6.0,70.0,216.0,4.45,35.5,...,11.0,334.0,1798.0,5.4,16.0,22.0,239.0,10.9,1.0,356.0
5,11,Ron Dayne,22.0,23.0,10.0,0.0,71.0,259.0,4.65,35.0,...,13.0,325.0,2109.0,6.5,21.0,14.0,133.0,9.5,0.0,339.0


In [None]:
tab_info2=pd.DataFrame(rb_merged_college.dtypes).T.rename(index={0:'Variable Type'})
tab_info2=tab_info2.append(pd.DataFrame(rb_merged_college.isnull().sum()).T.rename(index={0:'Null Values'}))
tab_info2 = tab_info2.append(pd.DataFrame(rb_merged_college.isnull().sum()/rb_merged_college.shape[0]*100).T.rename(index={0:'Null Value (%)'}))
tab_info2

  tab_info2=tab_info2.append(pd.DataFrame(rb_merged_college.isnull().sum()).T.rename(index={0:'Null Values'}))
  tab_info2 = tab_info2.append(pd.DataFrame(rb_merged_college.isnull().sum()/rb_merged_college.shape[0]*100).T.rename(index={0:'Null Value (%)'}))


Unnamed: 0,Pick,Player,Age,wAV,DrAV,Top_24_Seasons,Ht,Wt,40yd,Vertical,...,Games,Rush_Att,Rush_Yds,RushYds_Avg,Rush_TD,Rec,Rec_Yds,RecYds_Avg,Rec_TD,Plays
Variable Type,int64,object,float64,float64,float64,float64,float64,float64,float64,float64,...,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
Null Values,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Null Value (%),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data-Collection/rb/rb_college_summaryRNN.csv')

# Create a new column 'Target' with default value of 0
df['Top_24_Season'] = 0


# Set the value of 'Target' to 1 where 'Top_24_Seasons' has value 1 or greater
df.loc[df['Top_24_Seasons'] >= 1, 'Top_24_Season'] = 1
df = df.drop(['Top_24_Seasons'], axis=1)

df = df[['Player', 'CollegeYear', 'Pick', 'Age', 'wAV', 'DrAV', 'Ht', 'Wt', '40yd', 'Vertical',
         'Bench', 'Broad Jump', '3Cone', 'Shuttle', 'Games', 'Rush_Att', 'Rush_Yds',
         'RushYds_Avg', 'Rush_TD', 'Rec', 'Rec_Yds', 'RecYds_Avg', 'Rec_TD', 'Plays',
         'Top_24_Season']]

# y_rb = rb_df['Target']
# df = df.drop(['Top_24_Seasons', 'Target', 'Player'], axis=1)

# Sort the data by player and year
df = df.sort_values(['Player', 'CollegeYear'])


# # Create the target variable
# targets = []
# for player, group in df.groupby('Player'):
#     target = group.iloc[-1]['Top_24_Season']
#     targets.append(target)
# targets = np.array(targets)

# print(targets)
print(len(df))
# print(len(targets))




1113


In [None]:
# Create sequences of input data
def create_sequences(df, sequence_length):
    sequences = []
    for player, group in df.groupby('Player'):
        for i in range(len(group) - sequence_length):
            sequence = group.iloc[i:i+sequence_length]
            sequences.append(sequence.values)
    return np.array(sequences)

In [None]:
print(df.columns)

Index(['Player', 'CollegeYear', 'Pick', 'Age', 'wAV', 'DrAV', 'Ht', 'Wt',
       '40yd', 'Vertical', 'Bench', 'Broad Jump', '3Cone', 'Shuttle', 'Games',
       'Rush_Att', 'Rush_Yds', 'RushYds_Avg', 'Rush_TD', 'Rec', 'Rec_Yds',
       'RecYds_Avg', 'Rec_TD', 'Plays', 'Top_24_Season'],
      dtype='object')


In [None]:
# # Normalize the input data
# scaler = MinMaxScaler()
# df[df.columns[2:]] = scaler.fit_transform(df[df.columns[2:]])


# Define sliding window sequence length
sequence_length = 2 

sequences = create_sequences(df, sequence_length)


# Pad the sequences
padded_sequences = np.zeros((len(sequences), sequence_length, df.shape[1]-2))
for i, sequence in enumerate(sequences):
    padded_sequences[i, :len(sequence), :] = sequence[:, 2:]



In [None]:


# # Split the data into training and test sets
# split_idx = int(0.8 * len(padded_sequences))
# train_sequences = padded_sequences[:split_idx]
# train_targets = targets[:split_idx]
# test_sequences = padded_sequences[split_idx:]
# test_targets = targets[split_idx:]

# # Reshape the data
# train_sequences = np.reshape(train_sequences, (train_sequences.shape[0], sequence_length, df.shape[1]-2))
# test_sequences = np.reshape(test_sequences, (test_sequences.shape[0], sequence_length, df.shape[1]-2))

# # Print the shapes of the data
# print('Train sequences shape:', train_sequences.shape)
# print('Train targets shape:', train_targets.shape)
# print('Test sequences shape:', test_sequences.shape)
# print('Test targets shape:', test_targets.shape)

In [None]:
# import tensorflow as tf
# from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# # Define RNN architecture
# model = tf.keras.Sequential([
#     tf.keras.layers.LSTM(64, input_shape=(sequence_length, df.shape[1]-2), return_sequences=True),
#     tf.keras.layers.LSTM(32),
#     tf.keras.layers.Dense(1, activation='sigmoid')
# ])

# # Compile model
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# # Split data into training and testing sets
# train_size = int(0.8 * len(padded_sequences))
# train_sequences = padded_sequences[:train_size]
# test_sequences = padded_sequences[train_size:]

# train_targets = df.groupby('Player').first().loc[df.loc[train_sequences[:,0,0],'Player']]['Top_24_Season'].values

# test_targets = df.groupby('Player').first().loc[df.loc[test_sequences[:,0,0],'Player']]['Top_24_Season'].values
# print(test_targets)



# # # Create dictionary mapping players to target variables
# # player_targets = {}
# # for player in df['Player'].unique():
# #     target = df.loc[(df['Player'] == player), 'Top_24_Season'].iloc[0]
# #     player_targets[player] = target

# # print(player_targets.keys())
# # print(train_sequences[:,0,0])

# # # Retrieve targets for training and testing sets
# # train_targets = [player_targets[player] for player in train_sequences[:,0,0]]
# # test_targets = [player_targets[player] for player in test_sequences[:,0,0]]






In [None]:
import tensorflow as tf
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
# from tensorflow.keras.regularizers import l2

from tensorflow.keras.layers import Dropout

# Define RNN architecture with Dropout regularization
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, input_shape=(sequence_length, df.shape[1]-2), return_sequences=True),
    Dropout(0.2),
    tf.keras.layers.LSTM(32),
    Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# # Define RNN architecture with L2 regularization
# model = tf.keras.Sequential([
#     tf.keras.layers.LSTM(64, input_shape=(sequence_length, df.shape[1]-2), return_sequences=True, kernel_regularizer=l2(0.01)),
#     tf.keras.layers.LSTM(32, kernel_regularizer=l2(0.01)),
#     tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=l2(0.01))
# ])

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Split data into training and testing sets
train_size = int(0.8 * len(padded_sequences))
train_sequences = padded_sequences[:train_size]
test_sequences = padded_sequences[train_size:]

train_targets = df.groupby('Player').first().loc[df.loc[train_sequences[:,0,0],'Player']]['Top_24_Season'].values

test_targets = df.groupby('Player').first().loc[df.loc[test_sequences[:,0,0],'Player']]['Top_24_Season'].values

# Train model
model.fit(train_sequences, train_targets, epochs=10, batch_size=32)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9d8aee5df0>

In [None]:
# # Train model
# model.fit(train_sequences, train_targets, epochs=10, batch_size=32)

# # Predict on test data
# y_pred = model.predict(test_sequences)
# y_pred_binary = (y_pred > 0.5).astype(int)

# # Calculate evaluation metrics
# confusion = confusion_matrix(test_targets, y_pred_binary)
# accuracy = accuracy_score(test_targets, y_pred_binary)
# precision = precision_score(test_targets, y_pred_binary)
# recall = recall_score(test_targets, y_pred_binary)

# # Print results
# print('Confusion matrix:')
# print(confusion)
# print('Accuracy:', accuracy)
# print('Precision:', precision)
# print('Recall:', recall)

In [None]:
# Predict on test data
y_pred = model.predict(test_sequences)
y_pred_binary = (y_pred > 0.5).astype(int)

# Calculate evaluation metrics
confusion = confusion_matrix(test_targets, y_pred_binary)
accuracy = accuracy_score(test_targets, y_pred_binary)
precision = precision_score(test_targets, y_pred_binary)
recall = recall_score(test_targets, y_pred_binary)
f1 = f1_score(test_targets, y_pred_binary)

# Print results
print('Confusion matrix:')
print(confusion)
print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Confusion matrix:
[[56  0]
 [39  0]]
Accuracy: 0.5894736842105263
Precision: 0.0
Recall: 0.0
F1-score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
