In [33]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('kids_data.csv')

# Display the initial data
print("Initial Data:")
print(df)

# Remove rows with NaN values
df_cleaned = df.dropna().reset_index(drop=True)

# Display cleaned data
print("\nData after removing rows with NaN values:")
print(df_cleaned)

# Accessing specific data
print("\nAccessing specific data:")
print("Name of the first person:", df_cleaned.loc[0, 'Name'])
print("Age of the second person:", df_cleaned.loc[1, 'Age'])

# Filtering data
print("\nFiltering data where Age is greater than 10:")
filtered_data = df_cleaned[df_cleaned['Age'] > 10]
print(filtered_data)

# Exporting cleaned data to a new CSV file
df_cleaned.to_csv('cleaned_kids_data.csv', index=False)
print("\nCleaned data exported to 'cleaned_kids_data.csv'.")


Initial Data:
       Name   Age           City Favorite_Color School_Grade  English_Marks  \
0     Alice  12.0       New York           Blue            A           85.0   
1       Bob   NaN    Los Angeles          Green            B           78.0   
2   Charlie  10.0        Chicago            Red            A           92.0   
3     David  11.0            NaN         Yellow            A           80.0   
4     Emily  12.0          Miami           Blue            B            NaN   
5     Frank  11.0         Dallas          Green            C           75.0   
6     Grace  10.0        Seattle            NaN            A           85.0   
7     Henry  13.0         Boston           Blue            B           88.0   
8    Isabel  11.0       Portland            Red            C           72.0   
9      Jack  12.0        Houston          Green            A           90.0   
10     Kate   NaN  San Francisco         Yellow            B           85.0   
11     Liam  10.0         Denver      

# Filling with the mean / avg values 

In [32]:
## if a row have nan values for some subjects we can fill it with the average value of that column 

import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('kids_data.csv')


df['Maths_Marks'].fillna(df['Maths_Marks'].mean(), inplace= True)
print(df['Maths_Marks'].to_string())


# Display the initial data
print("Initial Data:")
print(df)

# Fill NaN values with the average marks of each subject
subjects = ['English_Marks', 'Maths_Marks', 'Science_Marks', 'History_Marks']
for subject in subjects:
    average_marks = df[subject].mean()  # Calculate the mean of each subject's marks
    df[subject].fillna(average_marks, inplace=True)  # Fill NaN values with the calculated mean

# Display data after filling NaN values
print("\nData after filling NaN values with average marks:")
print(df)

# Export updated data to a new CSV file
df.to_csv('updated_kids_data.csv', index=False)
print("\nUpdated data exported to 'updated_kids_data.csv'.")


0        90.0
1     55555.0
2        85.0
3        82.0
4        88.0
5        70.0
6        90.0
7        85.0
8     55555.0
9        92.0
10       80.0
11       88.0
12       80.0
13       75.0
14       85.0
15       90.0
16       75.0
17       92.0
18       88.0
19       78.0
Initial Data:
       Name   Age           City Favorite_Color School_Grade  English_Marks  \
0     Alice  12.0       New York           Blue            A           85.0   
1       Bob   NaN    Los Angeles          Green            B           78.0   
2   Charlie  10.0        Chicago            Red            A           92.0   
3     David  11.0            NaN         Yellow            A           80.0   
4     Emily  12.0          Miami           Blue            B            NaN   
5     Frank  11.0         Dallas          Green            C           75.0   
6     Grace  10.0        Seattle            NaN            A           85.0   
7     Henry  13.0         Boston           Blue            B           88.

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Maths_Marks'].fillna(55555, inplace= True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[subject].fillna(average_marks, inplace=True)  # Fill NaN values with the calculated mean


In [28]:
print(df)

       Name   Age           City Favorite_Color School_Grade  English_Marks  \
0     Alice  12.0       New York           Blue            A      85.000000   
1       Bob   NaN    Los Angeles          Green            B      78.000000   
2   Charlie  10.0        Chicago            Red            A      92.000000   
3     David  11.0            NaN         Yellow            A      80.000000   
4     Emily  12.0          Miami           Blue            B      82.941176   
5     Frank  11.0         Dallas          Green            C      75.000000   
6     Grace  10.0        Seattle            NaN            A      85.000000   
7     Henry  13.0         Boston           Blue            B      88.000000   
8    Isabel  11.0       Portland            Red            C      72.000000   
9      Jack  12.0        Houston          Green            A      90.000000   
10     Kate   NaN  San Francisco         Yellow            B      85.000000   
11     Liam  10.0         Denver           Blue     