In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    "name": ["Alice", "Bob", "Charlie", "David"],
    "age": [25, 30, 35, 28],
    "score":[85, 90, 88, 92]
}

df = pd.DataFrame(data)
print(df.head())

      name  age  score
0    Alice   25     85
1      Bob   30     90
2  Charlie   35     88
3    David   28     92


In [3]:
filtered_df = df[df["age"] > 28]

print("Filtered (age > 28): ")
print(filtered_df)

Filtered (age > 28): 
      name  age  score
1      Bob   30     90
2  Charlie   35     88


In [6]:
df.loc[1, "age"] = np.nan
df.loc[3, "score"] = np.nan

print("With missing values: ")
print(df)

With missing values: 
      name   age  score  socre
0    Alice  25.0   85.0    NaN
1      Bob   NaN   90.0    NaN
2  Charlie  35.0   88.0    NaN
3    David  28.0    NaN    NaN


In [5]:
print("Missing values count: ")
print(df.isnull().sum())

Missing values count: 
name     0
age      1
score    0
socre    4
dtype: int64


In [13]:
df = df.drop(columns=["socre"])
print(df)

      name   age  score
0    Alice  25.0   85.0
1      Bob   NaN   90.0
2  Charlie  35.0   88.0
3    David  28.0    NaN


In [14]:
df["age"].fillna(df["age"].mean(),inplace=True)
df["score"].fillna(df["score"].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["age"].fillna(df["age"].mean(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["score"].fillna(df["score"].mean(), inplace=True)


In [15]:
print("After filling missing values: ")
print(df)

After filling missing values: 
      name        age      score
0    Alice  25.000000  85.000000
1      Bob  29.333333  90.000000
2  Charlie  35.000000  88.000000
3    David  28.000000  87.666667


In [17]:
df["passed"] = df["score"] >= 85

# features and target
X = df[["age", "score"]]
y = df[["passed"]]

print("Features (X):")
print(X)

Features (X):
         age      score
0  25.000000  85.000000
1  29.333333  90.000000
2  35.000000  88.000000
3  28.000000  87.666667


In [18]:
print("Target(y):")
print(y)

Target(y):
   passed
0    True
1    True
2    True
3    True
