In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv('anemia.csv')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

# Data splitting

In [None]:
x=df[['Gender','Hemoglobin','MCH','MCHC','MCV']]
y=df[['Result']]

In [None]:
x.shape

In [None]:
y.shape

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2, random_state=42)

# Model selection

In [None]:
rf=RandomForestClassifier()
lr=LogisticRegression()

In [None]:
rf.fit(x_train,y_train)
y_pred=rf.predict(x_test)
accuracy_score(y_test,y_pred)

In [None]:
lr.fit(x_train,y_train)
y_pred = lr.predict(x_test)
accuracy_score(y_test,y_pred)

In [None]:
from sklearn.model_selection import cross_val_score
np.mean(cross_val_score(LogisticRegression(),x,y,cv=10,scoring="accuracy"))

# Grid SearchCV

In [None]:
param_grid = [
    {'penalty':['l1','l2','elasticnet','none'],
    'C' : np.logspace(-4,4,10)}
]

In [None]:
from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(lr,param_grid = param_grid, cv = 3, verbose=True,n_jobs=-1)
clf

In [None]:
best_clf = clf.fit(x,y)


In [None]:
best_clf.best_estimator_

In [None]:
best_clf.best_score_

# Monte Carlo Simulation

In [None]:
mean_hemo = x["Hemoglobin"].mean()
std_hemo = x["Hemoglobin"].std()

mean_mch = x["MCH"].mean()
std_mch = x["MCH"].std()

mean_mchc = x["MCHC"].mean()
std_mchc = x["MCHC"].std()

mean_mcv = x["MCV"].mean()
std_mcv = x["MCV"].std()
               

In [None]:
num_simulations = 10000

In [None]:
simulated_gender = np.random.randint(2, size=num_simulations)
simulated_hemo= np.random.normal(mean_hemo, std_hemo, num_simulations)
simulated_mch = np.random.normal(mean_mch, std_mch, num_simulations)
simulated_mchc = np.random.normal(mean_mchc, std_mchc, num_simulations)
simulated_mcv = np.random.normal(mean_mcv, std_mcv,num_simulations)


In [None]:
x_simulated = np.column_stack([simulated_gender,simulated_hemo,simulated_mch,simulated_mchc,
                               simulated_mcv])

In [None]:
score_predictions = lr.predict(x_simulated)

In [None]:
score_predictions

In [None]:
count_ones = np.count_nonzero(score_predictions == 1)
count_ones


In [None]:
percentage=count_ones/num_simulations
percentage

In [None]:
from sklearn.model_selection import cross_val_score
np.mean(cross_val_score(LogisticRegression(C=0.0001, penalty='none'),x,y,cv=10,scoring="accuracy"))