In [1]:
import pandas as pd
import numpy as np

data = {
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'age': [25, None, 35, 40,None],
    'Salary': [50000, 60000, None, 80000, None],
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

df.isnull().sum()
df.dropna()

df['age'].fillna(df['age'].mean(),inplace=True)
df['Salary'].fillna(df['Salary'].mean(),inplace=True)

df


Original DataFrame:
      name   age   Salary
0    Alice  25.0  50000.0
1      Bob   NaN  60000.0
2  Charlie  35.0      NaN
3    David  40.0  80000.0
4      Eve   NaN      NaN


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].mean(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Salary'].fillna(df['Salary'].mean(),inplace=True)


Unnamed: 0,name,age,Salary
0,Alice,25.0,50000.0
1,Bob,33.333333,60000.0
2,Charlie,35.0,63333.333333
3,David,40.0,80000.0
4,Eve,33.333333,63333.333333


In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

df = pd.read_excel('Book1.xlsx')

df_label_encoded = df.copy()
le = LabelEncoder()

df_label_encoded['Gender_Encoded'] = le.fit_transform(df_label_encoded['Gender'])
df_label_encoded['Passed_Encoded'] = le.fit_transform(df_label_encoded['Passed'])


df_encoded = pd.get_dummies(df_label_encoded, columns=['City'])

bool_cols = ['City_Banglore', 'City_Dehli', 'City_Mumbai']
df_encoded[bool_cols] = df_encoded[bool_cols].astype(int)

print("DataFrame after handling missing values:")
print(df_encoded)

DataFrame after handling missing values:
     Name  Gender Passed  Gender_Encoded  Passed_Encoded  City_Banglore  \
0    Aman    Male    Yes               1               1              0   
1   Priya  Female    Yes               0               1              0   
2   Rahul    Male     No               1               0              1   
3  Anjali  Female    Yes               0               1              0   
4    Ravi    Male     No               1               0              0   

   City_Dehli  City_Mumbai  
0           1            0  
1           0            1  
2           0            0  
3           0            1  
4           1            0  


In [3]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform()

scaler = MinMaxScaler()
x_scaled = scaler.fit_transform()



TypeError: TransformerMixin.fit_transform() missing 1 required positional argument: 'X'

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

data = {
    'Study_hours': [1, 2, 3, 5, 5],
    'Exam_score': [40, 50, 60, 70, 80]
}

df = pd.DataFrame(data)

StandardScaler= StandardScaler()
StandardScaled = StandardScaler.fit_transform(df)
print("Standard Scaled Output")
print(pd.DataFrame(StandardScaled, columns=df.columns))

MinMaxScaler = MinMaxScaler()
MinMaxScaled = MinMaxScaler.fit_transform(df)
print("MinMax Scaled Output")
print(pd.DataFrame(MinMaxScaled, columns=df.columns))

X = df[['Study_hours']]
y = df[['Exam_score']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Train set:")
print(X_train, y_train)
print("Test set:")
print(X_test, y_test)



Standard Scaled Output
   Study_hours  Exam_score
0       -1.375   -1.414214
1       -0.750   -0.707107
2       -0.125    0.000000
3        1.125    0.707107
4        1.125    1.414214
MinMax Scaled Output
   Study_hours  Exam_score
0         0.00        0.00
1         0.25        0.25
2         0.50        0.50
3         1.00        0.75
4         1.00        1.00
Train set:
   Study_hours
4            5
2            3
0            1
3            5    Exam_score
4          80
2          60
0          40
3          70
Test set:
   Study_hours
1            2    Exam_score
1          50


In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# If you're using df_encoded, make sure to assign it
df = df_encoded  # use the encoded DataFrame

# Strip extra spaces just in case
df.columns = df.columns.str.strip() 

# Columns you want to scale
cols_to_scale = ['Gender_Encoded', 'Passed_Encoded', 'City_Banglore', 'City_Dehli', 'City_Mumbai']

# Standard Scaling
scaler_standard = StandardScaler()
standard_scaled = scaler_standard.fit_transform(df[cols_to_scale])
df_standard = pd.DataFrame(standard_scaled, columns=[col + '_std' for col in cols_to_scale])

# MinMax Scaling
scaler_minmax = MinMaxScaler()
minmax_scaled = scaler_minmax.fit_transform(df[cols_to_scale])
df_minmax = pd.DataFrame(minmax_scaled, columns=[col + '_minmax' for col in cols_to_scale])

# Combine original + scaled
df_final = pd.concat([df, df_standard, df_minmax], axis=1)

# Show result
print(df_final)









     Name  Gender Passed  Gender_Encoded  Passed_Encoded  City_Banglore  \
0    Aman    Male    Yes               1               1              0   
1   Priya  Female    Yes               0               1              0   
2   Rahul    Male     No               1               0              1   
3  Anjali  Female    Yes               0               1              0   
4    Ravi    Male     No               1               0              0   

   City_Dehli  City_Mumbai  Gender_Encoded_std  Passed_Encoded_std  \
0           1            0            0.816497            0.816497   
1           0            1           -1.224745            0.816497   
2           0            0            0.816497           -1.224745   
3           0            1           -1.224745            0.816497   
4           1            0            0.816497           -1.224745   

   City_Banglore_std  City_Dehli_std  City_Mumbai_std  Gender_Encoded_minmax  \
0               -0.5        1.224745        -0.8

In [None]:
scaler = StandardScaler()
scaled_standard = scaler.fit_transform(df[cols_to_scale])

df_standard = pd.DataFrame(scaled_standard, columns=cols_to_scale)
print("Standard Scaled Data:")
print(df_standard)


KeyError: "None of [Index(['Gender_Encoded', 'Passed_Encoded', 'City_Banglore', 'City_Dehli',\n       'City_Mumbai'],\n      dtype='object')] are in the [columns]"

In [None]:
import sys
print(sys.executable)


c:\Users\VaMp!r3\AppData\Local\Programs\Python\Python313\python.exe


In [None]:
from sklearn.preprocessing import StandardScaler
print("Scikit-learn is working!")



Scikit-learn is working!
