# Logistic Regression Analysis of Prakriti Dataset

## 1. Data Loading and Preprocessing

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset
df = pd.read_csv("Prakriti_With_Features.csv")

# Display the first few rows of the dataframe
print(df.head())

In [None]:
# One-hot encode the features
X = pd.get_dummies(df.drop('Dosha', axis=1))

# Binarize the target variable 'Dosha'
y = df['Dosha'].apply(lambda x: 1 if x == 'Pitta' else 0)

## 2. Splitting the Data

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

## 3. Model Development and Prediction

In [None]:
# Instantiate and fit the logistic regression model
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)

# Predict on the test data
y_pred = logreg.predict(X_test)
print("Predicted Test Results: ", y_pred)

## 4. Model Evaluation

### Confusion Matrix

In [None]:
# Generate confusion matrix
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)

# Visualize the confusion matrix using a heatmap
class_names=['Not Pitta', 'Pitta'] # name of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
plt.show()

### Evaluation Metrics

In [None]:
# Print evaluation metrics
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

### ROC Curve and AUC

In [None]:
# Calculate ROC curve and AUC
y_pred_proba = logreg.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()