In [None]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from scipy.stats import pearsonr

### Q1: Pearson Correlation Coefficient - Study Time vs Exam Scores
study_time = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
exam_scores = [50, 55, 60, 65, 70, 75, 80, 85, 90, 95]
pearson_corr, _ = pearsonr(study_time, exam_scores)
print("Pearson Correlation between Study Time and Exam Scores:", pearson_corr)

### Q2: Spearman's Rank Correlation - Sleep vs Job Satisfaction
sleep_hours = [4, 5, 6, 7, 8, 9, 10, 4, 6, 8]
job_satisfaction = [3, 4, 6, 7, 8, 9, 10, 2, 5, 7]
spearman_corr, _ = spearmanr(sleep_hours, job_satisfaction)
print("Spearman Correlation between Sleep and Job Satisfaction:", spearman_corr)

### Q3: Pearson & Spearman Correlation - Exercise vs BMI
np.random.seed(42)
exercise_hours = np.random.randint(1, 10, 50)
bmi = np.random.uniform(18, 30, 50)

pearson_corr_ex_bmi, _ = pearsonr(exercise_hours, bmi)
spearman_corr_ex_bmi, _ = spearmanr(exercise_hours, bmi)

print("Pearson Correlation between Exercise Hours and BMI:", pearson_corr_ex_bmi)
print("Spearman Correlation between Exercise Hours and BMI:", spearman_corr_ex_bmi)

### Q4: Pearson Correlation - TV Watching vs Physical Activity
np.random.seed(42)
tv_hours = np.random.randint(1, 10, 50)
physical_activity = np.random.randint(1, 10, 50)
pearson_corr_tv_activity, _ = pearsonr(tv_hours, physical_activity)
print("Pearson Correlation between TV Watching and Physical Activity:", pearson_corr_tv_activity)

### Q5: Difference between Ordinal Encoding and Label Encoding
# Ordinal Encoding is used for categorical variables with meaningful order (e.g., Low < Medium < High)
# Label Encoding assigns arbitrary numbers to categories (used when no order exists)

# Example for Ordinal Encoding (Ordered Categories)
size_data = [["Small"], ["Medium"], ["Large"]]
ordinal_encoder = OrdinalEncoder(categories=[['Small', 'Medium', 'Large']])
size_encoded = ordinal_encoder.fit_transform(size_data)
print("Ordinal Encoding Output:\n", size_encoded)

# Example for Label Encoding (Unordered Categories)
le = LabelEncoder()
colors = ['Red', 'Green', 'Blue', 'Red', 'Blue']
color_encoded = le.fit_transform(colors)
print("\nLabel Encoding Output:\n", color_encoded)

### Q6: Target Guided Ordinal Encoding
# It orders categories based on the mean of the target variable
# Example: Encoding "City" based on average house prices
house_data = pd.DataFrame({
    "City": ["A", "B", "C", "A", "B", "C"],
    "Price": [200, 250, 300, 220, 270, 320]
})
avg_price = house_data.groupby("City")['Price'].mean().sort_values()
target_ordinal_mapping = {k: v for v, k in enumerate(avg_price.index)}
house_data['City_Encoded'] = house_data['City'].map(target_ordinal_mapping)
print("\nTarget Guided Ordinal Encoding:\n", house_data)

### Q7: Covariance Definition
# Covariance measures the relationship between two variables
# Formula: Cov(X, Y) = E[(X - E[X]) * (Y - E[Y])]

def covariance(x, y):
    return np.cov(x, y, bias=True)[0][1]

x = [10, 20, 30, 40, 50]
y = [5, 10, 15, 20, 25]
print("\nCovariance between x and y:", covariance(x, y))

### Q8: Label Encoding with scikit-learn
data = pd.DataFrame({
    "Color": ["Red", "Green", "Blue", "Green", "Red"],
    "Size": ["Small", "Medium", "Large", "Small", "Large"],
    "Material": ["Wood", "Metal", "Plastic", "Metal", "Wood"]
})
le = LabelEncoder()
for col in data.columns:
    data[col] = le.fit_transform(data[col])
print("\nLabel Encoded Data:\n", data)

### Q9: Covariance Matrix for Age, Income, Education Level
sample_data = pd.DataFrame({
    "Age": [25, 30, 35, 40, 45],
    "Income": [40000, 50000, 60000, 70000, 80000],
    "Education_Level": [1, 2, 3, 4, 5]  # 1: High School, 5: PhD
})
cov_matrix = sample_data.cov()
print("\nCovariance Matrix:\n", cov_matrix)

### Q10: Encoding Choices for Different Variables
# Gender: Label Encoding (Binary: 0 or 1)
# Education Level: Ordinal Encoding (Ordered: High School < Bachelor's < Master's < PhD)
# Employment Status: One-Hot Encoding (Unordered Categories)

education_levels = [["High School"], ["Bachelor's"], ["Master's"], ["PhD"]]
education_encoder = OrdinalEncoder(categories=[["High School", "Bachelor's", "Master's", "PhD"]])
education_encoded = education_encoder.fit_transform(education_levels)
print("\nOrdinal Encoding for Education Level:\n", education_encoded)

### Q11: Covariance Between Temperature, Humidity, Weather Condition, and Wind Direction
weather_data = pd.DataFrame({
    "Temperature": [30, 25, 28, 35, 20],
    "Humidity": [60, 65, 70, 55, 80],
    "Weather_Condition": ["Sunny", "Cloudy", "Rainy", "Sunny", "Cloudy"],
    "Wind_Direction": ["North", "South", "East", "West", "North"]
})
weather_encoded = pd.get_dummies(weather_data, columns=["Weather_Condition", "Wind_Direction"], drop_first=True)
cov_matrix_weather = weather_encoded.cov()
print("\nCovariance Matrix for Weather Data:\n", cov_matrix_weather)
