In [1]:
# Getting Started with Pandas
# Objective: Introduce students to using Pandas for data analysis by loading data into Pandas
# DataFrames.

# Question 1: Importing Pandas and Loading a CSV File
# 1. Open your Jupyter Notebook or a Python environment.
# 2. Import the pandas library.
# 3. Load a CSV file into a DataFrame.

import pandas as pd

df = pd.read_csv('winequality-red.csv',delimiter=';')


# Question 2: Displaying the First Few Rows
# 4. Use the head() method to display the first five rows of the DataFrame.

print(df.head())



# Question 3: Basic Data Information
# 5. Use the info() method to get a concise summary of the DataFrame.
df.info()





   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2      9.8        5 

In [2]:
# Data Inspection & Selection
# Objective: Learn how to inspect data and select specific data points.

# Question 1: Inspecting Column Data Types
# 6. Use the dtypes attribute to inspect the data types of each column.

print(df.dtypes)


# Question 2: Selecting Columns
# 7. Select a single column from the DataFrame.


print(df['fixed acidity'])


# Question 3: Slicing Rows
# 8. Select specific rows using slicing.

print(df[:10])




fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
dtype: object
0        7.4
1        7.8
2        7.8
3       11.2
4        7.4
        ... 
1594     6.2
1595     5.9
1596     6.3
1597     5.9
1598     6.0
Name: fixed acidity, Length: 1599, dtype: float64
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4   

In [3]:
# Data Cleaning & Manipulation
# Objective: Practice cleaning data and manipulating DataFrames.

# Question 1: Handling Missing Values
# 9. Use the fillna() method to fill missing values with a specific value.

df.fillna(5,inplace=True)


# QUestion 2: Renaming Columns
# 10. Change the names of specific columns using rename().

df = df.rename(columns={'fixed acidity':'fa'})


# Question 3: Dropping Duplicates
# 11. Remove duplicate rows from the DataFrame.

df.drop_duplicates(inplace=True)





In [4]:
# Data Aggregation & Exporting
# Objective: Aggregate data and export the results.

# Question 1: Grouping and Aggregating Data
# 12. Group data by a specific column and calculate the mean for each group.


df.groupby('alcohol').mean()


# Question 2: Exporting Data to CSV
# 13. Export the DataFrame to a new CSV file.


df.to_csv('NEW_WINE_DATASET')


# Question 3: Aggregating with Multiple Functions
# 14. Apply several aggregate functions to the grouped data.

df.groupby('alcohol').agg(
    desnity = ('density','mean'),
    ph = ('pH','median')
)




Unnamed: 0_level_0,desnity,ph
alcohol,Unnamed: 1_level_1,Unnamed: 2_level_1
8.400000,1.000100,3.010
8.500000,0.999140,3.150
8.700000,0.997750,3.330
8.800000,1.002420,3.160
9.000000,0.998387,3.300
...,...,...
13.500000,0.991500,3.390
13.566667,0.991820,3.540
13.600000,0.991722,3.515
14.000000,0.991863,3.690
