In [7]:
print("Hello, Healthcare!")
name = "Lordsfavour Anukam"
print("My name is", name)


Hello, Healthcare!
My name is Lordsfavour Anukam


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)

data = {
    'Patient_ID': range(1, 101),
    'Age': np.random.randint(18, 80, 100),
    'Gender': np.random.choice(['Male', 'Female'], 100),
    'BMI': np.random.uniform(18.5, 40, 100),
    'Blood_Pressure': np.random.randint(90, 180, 100),
    'Cholesterol': np.random.randint(150, 300, 100),
    'Smoking': np.random.choice(['Yes', 'No'], 100),
    'Diabetes': np.random.choice(['Yes', 'No'], 100)
}
df = pd.DataFrame(data)

# Display first few rows
print("First 5 rows of the dataset:")
print(df.head())

# Step 2: Data Cleaning
# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Drop rows with missing values (if any)
df = df.dropna()

# Convert categorical variables to numeric
df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})
df['Smoking'] = df['Smoking'].map({'Yes': 1, 'No': 0})
df['Diabetes'] = df['Diabetes'].map({'Yes': 1, 'No': 0})

# Remove outliers (e.g., BMI > 50 or < 10)
df = df[(df['BMI'] >= 10) & (df['BMI'] <= 50)]

# Step 3: Exploratory Data Analysis (EDA)
# Summary statistics
print("\nSummary Statistics:")
print(df.describe())

# Correlation matrix
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.savefig('correlation_matrix.png')
plt.close()

# Distribution of Age
plt.figure(figsize=(8, 5))
sns.histplot(df['Age'], bins=20, kde=True)
plt.title('Age Distribution')
plt.xlabel('Age')
plt.ylabel('Count')
plt.savefig('age_distribution.png')
plt.close()

# BMI vs. Diabetes
plt.figure(figsize=(8, 5))
sns.boxplot(x='Diabetes', y='BMI', data=df)
plt.title('BMI vs. Diabetes Status')
plt.savefig('bmi_vs_diabetes.png')
plt.close()


# Predict Diabetes based on other features
X = df[['Age', 'Gender', 'BMI', 'Blood_Pressure', 'Cholesterol', 'Smoking']]
y = df['Diabetes']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('confusion_matrix.png')
plt.close()

# Feature Importance
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)

plt.figure(figsize=(8, 5))
sns.barplot(x='Importance', y='Feature', data=feature_importance)
plt.title('Feature Importance for Diabetes Prediction')
plt.savefig('feature_importance.png')
plt.close()

# Step 5: Save the cleaned dataset
df.to_csv('cleaned_health_data.csv', index=False)
print("\nCleaned dataset saved as 'cleaned_health_data.csv'")

# Step 6: Summary of Findings
print("\nSummary of Findings:")
print("- The dataset contains {} patients with features like Age, BMI, and Diabetes status.".format(len(df)))
print("- Key visualizations include Age distribution, BMI vs. Diabetes, and a correlation matrix.")
print("- A Random Forest model was trained to predict Diabetes with the given features.")
print("- Check saved plots: correlation_matrix.png, age_distribution.png, bmi_vs_diabetes.png, confusion_matrix.png, feature_importance.png")

First 5 rows of the dataset:
   Patient_ID  Age  Gender        BMI  Blood_Pressure  Cholesterol Smoking  \
0           1   56  Female  30.610425             125          293      No   
1           2   69  Female  33.453596             127          291      No   
2           3   46  Female  21.495626             173          264      No   
3           4   32  Female  31.494974             178          292     Yes   
4           5   60    Male  30.106583             114          241      No   

  Diabetes  
0       No  
1      Yes  
2      Yes  
3       No  
4       No  

Missing Values:
Patient_ID        0
Age               0
Gender            0
BMI               0
Blood_Pressure    0
Cholesterol       0
Smoking           0
Diabetes          0
dtype: int64

Summary Statistics:
       Patient_ID         Age      Gender         BMI  Blood_Pressure  \
count  100.000000  100.000000  100.000000  100.000000      100.000000   
mean    50.500000   50.270000    0.580000   29.515449      130.5800

**EEG Calculator**

In [4]:
print("Calculating EEG Average for AI/ML/BCI, for Christ’s Glory!")
eeg1 = 10.0  # Your EEG amplitude 1 in μV
eeg2 = 15.0  # Your EEG amplitude 2 in μV
avg_eeg = (eeg1 + eeg2) / 2
print("Patient: Dr. Lordsfavour Anukam")
print("EEG Amplitude 1 (μV):", eeg1)
print("EEG Amplitude 2 (μV):", eeg2)
print("Average EEG Amplitude (μV):", avg_eeg)
print("Healing for Jesus!")



Calculating EEG Average for AI/ML/BCI, for Christ’s Glory!
Patient: Dr. Lordsfavour Anukam
EEG Amplitude 1 (μV): 10.0
EEG Amplitude 2 (μV): 15.0
Average EEG Amplitude (μV): 12.5
Healing for Jesus!
