In [1]:
#Order Significance:

#Ordinal Encoding: Assumes a meaningful order or hierarchy among categories and assigns numerical values based on this order.

#Label Encoding: Does not assume any specific order; it merely assigns numerical values to different categories.

#Applicability:
#Ordinal Encoding: Suitable for categorical variables with a clear ordinal relationship, where the order of categories matters (e.g., low, medium, high).

#Label Encoding: More general and applicable to both ordinal and nominal categorical variables.

##Example:
#Scenario: Education Levels

#Ordinal Encoding:
#Categories: High School, Associate's Degree, Bachelor's Degree, Master's Degree, Ph.D.
#Ordinal Encoding: 1, 2, 3, 4, 5

#Label Encoding:
#Categories: High School, Associate's Degree, Bachelor's Degree, Master's Degree, Ph.D.
#Label Encoding: 1, 2, 3, 4, 5


In [3]:
##@ Explain how Target Guided Ordinal Encoding works and provide an example ?
#In summary, Target Guided Ordinal Encoding is beneficial when there is an ordinal relationship between categories and the target variable, making it a useful technique for encoding categorical variables in certain machine learning projects.
import pandas as pd
import numpy as np

data = {'Education Level': ['High School', 'Bachelor', 'Master', 'Ph.D.', 'Bachelor', 'High School'],
        'Churn': [1, 0, 1, 0, 1, 0]}

df = pd.DataFrame(data)

education_mean_churn = df.groupby('Education Level')['Churn'].mean().sort_values()

ordinal_mapping = {edu: i for i, edu in enumerate(education_mean_churn.index)}

df['Education Level Encoded'] = df['Education Level'].map(ordinal_mapping)

print(df)


  Education Level  Churn  Education Level Encoded
0     High School      1                        2
1        Bachelor      0                        1
2          Master      1                        3
3           Ph.D.      0                        0
4        Bachelor      1                        1
5     High School      0                        2


In [4]:
##Define covariance and explain why it is important in statistical analysis.?

# Covariance is a statistical measure that describes the extent to which two variables change together
#Importance in Statistical Analysis:

#Direction of Relationship:

#Positive Covariance: Indicates that as one variable increases, the other variable tends to increase as well.
#Negative Covariance: Suggests that as one variable increases, the other variable tends to decrease.

#Strength of Relationship:

#The magnitude of covariance gives a sense of the strength of the linear relationship between the variables. Larger absolute values indicate a stronger relationship.

#Independence:

#If the covariance is close to zero, it suggests that the variables are not strongly related. However, it does not imply independence, as variables can be dependent in nonlinear ways.

#Used in Linear Regression:

#Covariance is a key component in the calculation of the coefficients in linear regression models.

In [1]:
#@For a dataset with the following categorical variables: Color (red, green, blue), Size (small, medium,large), and Material (wood, metal, plastic), perform label encoding using Python's scikit-learn library. 

from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Sample dataset
data = {'Color': ['red', 'green', 'blue'],
    'Size': ['small', 'medium', 'large' ],
    'Material': ['wood', 'metal', 'plastic']}

df = pd.DataFrame(data)

# Initializing label encoders
label_encoder_color = LabelEncoder()
label_encoder_size = LabelEncoder()
label_encoder_material = LabelEncoder()

# Applying label encoding to each column
df['Color_encoded'] = label_encoder_color.fit_transform(df['Color'])
df['Size_encoded'] = label_encoder_size.fit_transform(df['Size'])
df['Material_encoded'] = label_encoder_material.fit_transform(df['Material'])

print(df)


   Color    Size Material  Color_encoded  Size_encoded  Material_encoded
0    red   small     wood              2             2                 2
1  green  medium    metal              1             1                 0
2   blue   large  plastic              0             0                 1


In [2]:
 #@ Calculate the covariance matrix for the following variables in a dataset: Age, Income, and Educationlevel. ?

    
import pandas as pd
import numpy as np

data = { 'Age': [25, 30, 35, 40, 45],
    'Income': [50000, 60000, 70000, 80000, 90000],
    'Education_Level': [12, 14, 16, 18, 20]}

df=pd.DataFrame(data)

df.cov()


Unnamed: 0,Age,Income,Education_Level
Age,62.5,125000.0,25.0
Income,125000.0,250000000.0,50000.0
Education_Level,25.0,50000.0,10.0


In [4]:
#@working on a machine learning project with a dataset containing several categorical
#variables, including "Gender" (Male/Female), "Education Level" (High School/Bachelor's/Master's/PhD),
#and "Employment Status" (Unemployed/Part-Time/Full-Time).

import pandas as pd
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

data = {'Gender': ['Male', 'Female', 'Male', 'Female', 'Male'],
    'Education Level': ['High School', 'Bachelor', 'Master', 'PhD', 'Bachelor'],
    'Employment Status': ['Unemployed', 'Part-Time', 'Full-Time', 'Part-Time', 'Full-Time']}

df = pd.DataFrame(data)

# Binary encoding for Gender
df['Gender_encoded'] = df['Gender'].map({'Male': 0, 'Female': 1})

# Ordinal encoding for Education Level
education_encoder = OrdinalEncoder(categories=[['High School', 'Bachelor', 'Master', 'PhD']])
df['Education_Level_encoded'] = education_encoder.fit_transform(df[['Education Level']])

# One-hot encoding for Employment Status
employment_status_encoded = pd.get_dummies(df['Employment Status'], prefix='Employment_Status')
df = pd.concat([df, employment_status_encoded], axis=1)

print(df)


   Gender Education Level Employment Status  Gender_encoded  \
0    Male     High School        Unemployed               0   
1  Female        Bachelor         Part-Time               1   
2    Male          Master         Full-Time               0   
3  Female             PhD         Part-Time               1   
4    Male        Bachelor         Full-Time               0   

   Education_Level_encoded  Employment_Status_Full-Time  \
0                      0.0                            0   
1                      1.0                            0   
2                      2.0                            1   
3                      3.0                            0   
4                      1.0                            1   

   Employment_Status_Part-Time  Employment_Status_Unemployed  
0                            0                             1  
1                            1                             0  
2                            0                             0  
3             

In [None]:
#@You are analyzing a dataset with two continuous variables, "Temperature" and "Humidity", and two
#categorical variables, "Weather Condition" (Sunny/Cloudy/Rainy) and "Wind Direction" (North/South/
#East/West). Calculate the covariance between each pair of variables and interpret the results.

data = {
    'Temperature': [25.0, 28.0, 22.0, 26.0, 30.0],
    'Humidity': [60, 65, 70, 55, 75],
    'Weather Condition': ['Sunny', 'Cloudy', 'Rainy', 'Sunny', 'Cloudy'],
    'Wind Direction': ['North', 'South', 'East', 'West', 'North']
}

df2=pd.DataFrame(data)

df2.co