In [10]:
import pandas as pd
from google.colab import files

# Function to upload and load the CSV file
def upload_csv():
    uploaded = files.upload()  # This will prompt the user to upload a file
    for fn in uploaded.keys():
        return pd.read_csv(fn)

# Upload the CSV file
df = upload_csv()

# 2. Show the basic statistical description about the data
basic_stats = df.describe()
print("Basic Statistical Description:\n", basic_stats)

# 3. Check if the data has null values
null_values = df.isnull().sum()
print("\nNull Values:\n", null_values)

# 3a. Replace the null values with the mean
df['Calories'].fillna(df['Calories'].mean(), inplace=True)

# Print result after replacing null values with the mean
print("\nData after replacing null values in 'Calories' with mean:\n", df.to_string())

# 4. Select at least two columns and aggregate the data using: min, max, count, mean
aggregation = df.agg({
    'Duration': ['min', 'max', 'count', 'mean'],
    'Calories': ['min', 'max', 'count', 'mean']
})
print("\nAggregation:\n", aggregation)

# 5. Filter the dataframe to select the rows with calories values between 500 and 1000
df_filtered_1 = df[(df['Calories'] >= 500) & (df['Calories'] <= 1000)]
print("\nFiltered DataFrame (Calories between 500 and 1000):\n", df_filtered_1)

# 6. Filter the dataframe to select the rows with calories values > 500 and pulse < 100
df_filtered_2 = df[(df['Calories'] > 500) & (df['Pulse'] < 100)]
print("\nFiltered DataFrame (Calories > 500 and Pulse < 100):\n", df_filtered_2)

# 7. Create a new “df_modified” dataframe that contains all the columns from df except for “Maxpulse”
df_modified = df.drop(columns=['Maxpulse'])
print("\nModified DataFrame without Maxpulse:\n", df_modified.to_string())

# 8. Delete the “Maxpulse” column from the main df dataframe
df.drop(columns=['Maxpulse'], inplace=True)

# Print result after dropping Maxpulse
print("\nMain DataFrame after dropping Maxpulse:\n", df.to_string())

# 9. Convert the datatype of Calories column to int datatype
df['Calories'] = df['Calories'].astype(int)

# Print the final dataframe
print("\nMain DataFrame after converting 'Calories' to int:\n", df.to_string())


Saving data.csv to data (2).csv
Basic Statistical Description:
          Duration       Pulse    Maxpulse     Calories
count  169.000000  169.000000  169.000000   164.000000
mean    63.846154  107.461538  134.047337   375.790244
std     42.299949   14.510259   16.450434   266.379919
min     15.000000   80.000000  100.000000    50.300000
25%     45.000000  100.000000  124.000000   250.925000
50%     60.000000  105.000000  131.000000   318.600000
75%     60.000000  111.000000  141.000000   387.600000
max    300.000000  159.000000  184.000000  1860.400000

Null Values:
 Duration    0
Pulse       0
Maxpulse    0
Calories    5
dtype: int64

Data after replacing null values in 'Calories' with mean:
      Duration  Pulse  Maxpulse     Calories
0          60    110       130   409.100000
1          60    117       145   479.000000
2          60    103       135   340.000000
3          45    109       175   282.400000
4          45    117       148   406.000000
5          60    102       127   