In [None]:
import pandas as pd

# Uploading the file
file_path = '/content/drive/MyDrive/data.csv'

# 1)Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# 2)Showing the basic statistical discription about the data
print("Basic Statistical Description:")
print(df.describe())

Basic Statistical Description:
         Duration       Pulse    Maxpulse     Calories
count  169.000000  169.000000  169.000000   164.000000
mean    63.846154  107.461538  134.047337   375.790244
std     42.299949   14.510259   16.450434   266.379919
min     15.000000   80.000000  100.000000    50.300000
25%     45.000000  100.000000  124.000000   250.925000
50%     60.000000  105.000000  131.000000   318.600000
75%     60.000000  111.000000  141.000000   387.600000
max    300.000000  159.000000  184.000000  1860.400000


In [None]:
# 3. Check for null values and replace them with the mean
print("\nChecking for null values:")
print(df.isnull().sum())

# a)Replace null values with the mean of their respective columns
df.fillna(df.mean(), inplace=True)
print("\nNull values after replacement:")
print(df.isnull().sum())


Checking for null values:
Duration    0
Pulse       0
Maxpulse    0
Calories    5
dtype: int64

Null values after replacement:
Duration    0
Pulse       0
Maxpulse    0
Calories    0
dtype: int64


In [None]:
# 4. Select at least two columns and aggregate the data
aggregated_data = df[['Calories', 'Pulse']].agg(['min', 'max', 'count', 'mean'])
print("\nAggregated Data:")
print(aggregated_data)


Aggregated Data:
          Calories       Pulse
min      50.300000   80.000000
max    1860.400000  159.000000
count   169.000000  169.000000
mean    375.790244  107.461538


In [None]:
# 5. Filter the dataframe for calories between 500 and 1000
filtered_df_calories = df[(df['Calories'] >= 500) & (df['Calories'] <= 1000)]
print("\nRows with calories between 500 and 1000:")
print(filtered_df_calories)


Rows with calories between 500 and 1000:
     Duration  Pulse  Maxpulse  Calories
51         80    123       146     643.1
62        160    109       135     853.0
65        180     90       130     800.4
66        150    105       135     873.4
67        150    107       130     816.0
72         90    100       127     700.0
73        150     97       127     953.2
75         90     98       125     563.2
78        120    100       130     500.4
83        120    100       130     500.0
90        180    101       127     600.1
99         90     93       124     604.1
101        90     90       110     500.0
102        90     90       100     500.0
103        90     90       100     500.4
106       180     90       120     800.3
108        90     90       120     500.3


In [None]:
# 6. Filter the dataframe for calories > 500 and pulse < 100
filtered_df_calories_pulse = df[(df['Calories'] > 500) & (df['Pulse'] < 100)]
print("\nRows with calories > 500 and pulse < 100:")
print(filtered_df_calories_pulse)



Rows with calories > 500 and pulse < 100:
     Duration  Pulse  Maxpulse  Calories
65        180     90       130     800.4
70        150     97       129    1115.0
73        150     97       127     953.2
75         90     98       125     563.2
99         90     93       124     604.1
103        90     90       100     500.4
106       180     90       120     800.3
108        90     90       120     500.3


In [None]:
# 7. Create a new dataframe df_modified without 'Maxpulse'
df_modified = df.drop(columns=['Maxpulse'])
print("\nModified DataFrame without 'Maxpulse':")
print(df_modified.head())


Modified DataFrame without 'Maxpulse':
   Duration  Pulse  Calories
0        60    110     409.1
1        60    117     479.0
2        60    103     340.0
3        45    109     282.4
4        45    117     406.0


In [None]:
# 8. Delete 'Maxpulse' from the main dataframe
df.drop(columns=['Maxpulse'], inplace=True)
print("\nMain DataFrame after deleting 'Maxpulse':")
print(df.head())


Main DataFrame after deleting 'Maxpulse':
   Duration  Pulse  Calories
0        60    110     409.1
1        60    117     479.0
2        60    103     340.0
3        45    109     282.4
4        45    117     406.0


In [None]:
# 9. Convert the 'Calories' column to int datatype
df['Calories'] = df['Calories'].astype(int)
print("\nData type of 'Calories' after conversion:")
print(df.dtypes)


Data type of 'Calories' after conversion:
Duration    int64
Pulse       int64
Calories    int64
dtype: object
