In [4]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


df = pd.read_csv('PK COVID-19-10may1.csv')
x = df[['Cases', 'Deaths', 'Recovered']]
y = df['Province']
y = pd.get_dummies(y, columns=['Province'])  # Convert categorical variable into dummy/indicator variables


from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

models = [MultiOutputClassifier(LogisticRegression()), MultiOutputClassifier(SVC()), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(x_train, y_train)  # Indent here
    y_pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_scores.append([model_name,accuracy])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Accuracy Score: ",f'{model[0]} {model[1]:.2f}')


Accuracy Score:  KNN 0.18
Accuracy Score:  Decision Tree 0.16
Accuracy Score:  Random Forest 0.16
Accuracy Score:  Logistic Regression 0.05
Accuracy Score:  SVM 0.00


In [6]:
models = [MultiOutputClassifier(LogisticRegression()), MultiOutputClassifier(SVC()), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(x_train, y_train)  # Indent here
    y_pred = model.predict(x_test)
    Precision = precision_score(y_test, y_pred,average='micro')
    models_scores.append([model_name,Precision])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Precision Score: ", f'{model[0]} : {model[1]:.2f}')

Precision Score:  Logistic Regression : 0.52
Precision Score:  SVM : 0.50
Precision Score:  KNN : 0.48
Precision Score:  Random Forest : 0.40
Precision Score:  Decision Tree : 0.39


In [8]:
models = [MultiOutputClassifier(LogisticRegression()), MultiOutputClassifier(SVC()), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(x_train, y_train)  # Indent here
    y_pred = model.predict(x_test)
    Recall = recall_score(y_test, y_pred,average='micro')
    models_scores.append([model_name,Recall])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Recall Score: ",f'{model[0]} : {model[1]:.2f}')

Recall Score:  KNN : 0.18
Recall Score:  Decision Tree : 0.16
Recall Score:  Random Forest : 0.15
Recall Score:  Logistic Regression : 0.05
Recall Score:  SVM : 0.00


In [10]:
models = [MultiOutputClassifier(LogisticRegression()), MultiOutputClassifier(SVC()), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(x_train, y_train)  # Indent here
    y_pred = model.predict(x_test)
    F1 = f1_score(y_test, y_pred,average='micro')
    models_scores.append([model_name,F1])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("F1 Score: ",f'{model[0]} : {model[1]:.2f}')


F1 Score:  KNN : 0.26
F1 Score:  Decision Tree : 0.22
F1 Score:  Random Forest : 0.22
F1 Score:  Logistic Regression : 0.09
F1 Score:  SVM : 0.01


In [None]:
models = [MultiOutputClassifier(LogisticRegression()), MultiOutputClassifier(SVC()), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(x_train, y_train)  # Indent here
    y_pred = model.predict(x_test)
    F1 = f1_score(y_test, y_pred,average='micro')
    models_scores.append([model_name,F1])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("F1 Score: ",f'{model[0]} : {model[1]:.2f}')


In [12]:
# libraries and model import
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
# define the models to be evaluated
models = [MultiOutputClassifier(LogisticRegression()), KNeighborsClassifier(), RandomForestClassifier(), MultiOutputClassifier(SVC()),DecisionTreeClassifier()]
names = ["Logistic Regression", "KNN", "Random Forest", "SVM"]
# perform k-fold cross-validation for each model
k = 10

for name, model in zip(names, models):
    # Indent the following lines for the for loop's body
    cv_scores = cross_val_score(model, x_train, y_train, cv=k)
    print(f"{name} CV accuracy: {np.mean(cv_scores):.3f} +/- {np.std(cv_scores):.3f}")


Logistic Regression CV accuracy: 0.050 +/- 0.016
KNN CV accuracy: 0.206 +/- 0.045
Random Forest CV accuracy: 0.188 +/- 0.023
SVM CV accuracy: 0.007 +/- 0.006


In [None]:
from sklearn.model_selection import GridSearchCV
# Define the parameter grid
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf'], 'gamma': [0.1, 1, 10]}
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(x_train, y_train)
print("Best parameters: {}".format(grid_search.best_params_))
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))


In [23]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier

# Assuming x_train, y_train are your training data and labels

# Define the parameter grid
param_grid = {
    'estimator__C': [0.1, 1, 10],
    'estimator__kernel': ['linear', 'rbf'],
    'estimator__gamma': [0.1, 1, 10]
}

# Create the multi-output classifier with SVM
svm_multi_output = MultiOutputClassifier(SVC())

# Create GridSearchCV object
grid_search = GridSearchCV(svm_multi_output, param_grid, cv=5, scoring='precision_micro')

# Fit the model to find the best parameters
grid_search.fit(x_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_

# Print the best parameters
print(f"Best Parameters: {best_params}")

# Predictions using the best model
y_pred = grid_search.predict(x_test)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Best Parameters: {'estimator__C': 0.1, 'estimator__gamma': 10, 'estimator__kernel': 'rbf'}


In [21]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier
# Define the parameter grid
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf'], 'gamma': [0.1, 1, 10]}
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(x_train, y_train)
print("Best parameters: {}".format(grid_search.best_params_))
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))


TypeError: GridSearchCV.__init__() takes 3 positional arguments but 4 positional arguments (and 2 keyword-only arguments) were given