In [9]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("fruits_data.csv")

# Show number of rows and columns
print("Shape of dataframe:", df.shape)

# List all columns
print("Columns in dataframe:", df.columns)

# Show the dataframe
print(df)

Shape of dataframe: (10, 6)
Columns in dataframe: Index(['date', 'apple(1kg)', 'banana(1 dozen)', 'grapes(1kg)', 'mango(1kg)',
       'Water Melons(1)'],
      dtype='object')
         date  apple(1kg)  banana(1 dozen)  grapes(1kg)  mango(1kg)  \
0  01-01-2022       230.0             46.0         65.0       184.0   
1  01-02-2022       237.0             50.0         72.0       173.0   
2  01-03-2022       259.0              NaN          NaN         NaN   
3  01-04-2022         NaN             41.0         75.0       175.0   
4  01-05-2022       240.0             48.0          NaN       185.0   
5  01-06-2022       234.0             45.0         66.0       194.0   
6  01-07-2022         NaN              NaN         70.0       174.0   
7  01-08-2022       236.0             46.0         66.0       187.0   
8  01-09-2022       232.0             41.0         71.0         NaN   
9  01-10-2022       240.0             48.0          NaN       198.0   

   Water Melons(1)  
0              NaN  


In [10]:
# Fill null values with -1
new_df = df.fillna(-1)

# Show the dataframe
print(new_df)


         date  apple(1kg)  banana(1 dozen)  grapes(1kg)  mango(1kg)  \
0  01-01-2022       230.0             46.0         65.0       184.0   
1  01-02-2022       237.0             50.0         72.0       173.0   
2  01-03-2022       259.0             -1.0         -1.0        -1.0   
3  01-04-2022        -1.0             41.0         75.0       175.0   
4  01-05-2022       240.0             48.0         -1.0       185.0   
5  01-06-2022       234.0             45.0         66.0       194.0   
6  01-07-2022        -1.0             -1.0         70.0       174.0   
7  01-08-2022       236.0             46.0         66.0       187.0   
8  01-09-2022       232.0             41.0         71.0        -1.0   
9  01-10-2022       240.0             48.0         -1.0       198.0   

   Water Melons(1)  
0             -1.0  
1             72.0  
2             93.0  
3             74.0  
4             84.0  
5             93.0  
6             -1.0  
7             86.0  
8             93.0  
9       

In [11]:
# Filling missing values column-wise

new_df = df.copy()

# Apple → Fill with mean
new_df['apple(1kg)'] = new_df['apple(1kg)'].fillna(new_df['apple(1kg)'].mean())

# Banana → Fill with mean
new_df['banana(1 dozen)'] = new_df['banana(1 dozen)'].fillna(new_df['banana(1 dozen)'].mean())

# Grapes → Fill with median
new_df['grapes(1kg)'] = new_df['grapes(1kg)'].fillna(new_df['grapes(1kg)'].median())

# Mango → Fill with median
new_df['mango(1kg)'] = new_df['mango(1kg)'].fillna(new_df['mango(1kg)'].median())

# Water Melons → Fill with string
new_df['Water Melons(1)'] = new_df['Water Melons(1)'].fillna("Not Available")

# Show the dataframe
print(new_df)


         date  apple(1kg)  banana(1 dozen)  grapes(1kg)  mango(1kg)  \
0  01-01-2022       230.0           46.000         65.0       184.0   
1  01-02-2022       237.0           50.000         72.0       173.0   
2  01-03-2022       259.0           45.625         70.0       184.5   
3  01-04-2022       238.5           41.000         75.0       175.0   
4  01-05-2022       240.0           48.000         70.0       185.0   
5  01-06-2022       234.0           45.000         66.0       194.0   
6  01-07-2022       238.5           45.625         70.0       174.0   
7  01-08-2022       236.0           46.000         66.0       187.0   
8  01-09-2022       232.0           41.000         71.0       184.5   
9  01-10-2022       240.0           48.000         70.0       198.0   

  Water Melons(1)  
0   Not Available  
1            72.0  
2            93.0  
3            74.0  
4            84.0  
5            93.0  
6   Not Available  
7            86.0  
8            93.0  
9            97.0 

In [12]:
# Fill null values using forward fill method
new_df = df.fillna(method='ffill')

# Show the dataframe
print(new_df)


         date  apple(1kg)  banana(1 dozen)  grapes(1kg)  mango(1kg)  \
0  01-01-2022       230.0             46.0         65.0       184.0   
1  01-02-2022       237.0             50.0         72.0       173.0   
2  01-03-2022       259.0             50.0         72.0       173.0   
3  01-04-2022       259.0             41.0         75.0       175.0   
4  01-05-2022       240.0             48.0         75.0       185.0   
5  01-06-2022       234.0             45.0         66.0       194.0   
6  01-07-2022       234.0             45.0         70.0       174.0   
7  01-08-2022       236.0             46.0         66.0       187.0   
8  01-09-2022       232.0             41.0         71.0       187.0   
9  01-10-2022       240.0             48.0         71.0       198.0   

   Water Melons(1)  
0              NaN  
1             72.0  
2             93.0  
3             74.0  
4             84.0  
5             93.0  
6             93.0  
7             86.0  
8             93.0  
9       

  new_df = df.fillna(method='ffill')


In [13]:
# Drop rows that do not have at least 4 non-null values
new_df = df.dropna(thresh=4)

# Show the dataframe
print(new_df)


         date  apple(1kg)  banana(1 dozen)  grapes(1kg)  mango(1kg)  \
0  01-01-2022       230.0             46.0         65.0       184.0   
1  01-02-2022       237.0             50.0         72.0       173.0   
3  01-04-2022         NaN             41.0         75.0       175.0   
4  01-05-2022       240.0             48.0          NaN       185.0   
5  01-06-2022       234.0             45.0         66.0       194.0   
7  01-08-2022       236.0             46.0         66.0       187.0   
8  01-09-2022       232.0             41.0         71.0         NaN   
9  01-10-2022       240.0             48.0          NaN       198.0   

   Water Melons(1)  
0              NaN  
1             72.0  
3             74.0  
4             84.0  
5             93.0  
7             86.0  
8             93.0  
9             97.0  


In [14]:
# Remove all rows that contain any null values
new_df = df.dropna()

# Save to final_data.csv (index=False as instructed)
new_df.to_csv("final_data.csv", index=False)

# Show the dataframe
print(new_df)


         date  apple(1kg)  banana(1 dozen)  grapes(1kg)  mango(1kg)  \
1  01-02-2022       237.0             50.0         72.0       173.0   
5  01-06-2022       234.0             45.0         66.0       194.0   
7  01-08-2022       236.0             46.0         66.0       187.0   

   Water Melons(1)  
1             72.0  
5             93.0  
7             86.0  


In [15]:
# Save the dataframe to a CSV file
df.to_csv("final_data.csv", index=False)
