In [2]:
# Getting Started with Pandas
# Objective: Introduce students to using Pandas for data analysis by loading data into Pandas
# DataFrames.

# Question 1: Importing Pandas and Loading a CSV File
# 1. Open your Jupyter Notebook or a Python environment.
# 2. Import the pandas library.
# 3. Load a CSV file into a DataFrame.
import pandas as pd
url = 'https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv'
df = pd.read_csv(url)



# Question 2: Displaying the First Few Rows
# 4. Use the head() method to display the first five rows of the DataFrame.
print(df.head())




# Question 3: Basic Data Information
# 5. Use the info() method to get a concise summary of the DataFrame.
print(df.info())





  Month   "1958"   "1959"   "1960"
0   JAN      340      360      417
1   FEB      318      342      391
2   MAR      362      406      419
3   APR      348      396      461
4   MAY      363      420      472
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Month    12 non-null     object
 1    "1958"  12 non-null     int64 
 2    "1959"  12 non-null     int64 
 3    "1960"  12 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 512.0+ bytes
None


In [4]:
# Data Inspection & Selection
# Objective: Learn how to inspect data and select specific data points.

# Question 1: Inspecting Column Data Types
# 6. Use the dtypes attribute to inspect the data types of each column.

print(df.dtypes)


# Question 2: Selecting Columns
# 7. Select a single column from the DataFrame.
print(df.columns)




# Question 3: Slicing Rows
# 8. Select specific rows using slicing.


print(df[0:3])



Month      object
 "1958"     int64
 "1959"     int64
 "1960"     int64
dtype: object
Index(['Month', ' "1958"', ' "1959"', ' "1960"'], dtype='object')
  Month   "1958"   "1959"   "1960"
0   JAN      340      360      417
1   FEB      318      342      391
2   MAR      362      406      419


In [5]:
# Data Cleaning & Manipulation
# Objective: Practice cleaning data and manipulating DataFrames.

# Question 1: Handling Missing Values
# 9. Use the fillna() method to fill missing values with a specific value.
df_filled = df.fillna(0)
print(df_filled)



# QUestion 2: Renaming Columns
# 10. Change the names of specific columns using rename().

df_renamed = df.rename(columns={'1958': 'Year_1958', '1959': 'Year_1959'})
print(df_renamed.columns)

# Question 3: Dropping Duplicates
# 11. Remove duplicate rows from the DataFrame.

df_no_duplicates = df.drop_duplicates()
print(df_no_duplicates)





   Month   "1958"   "1959"   "1960"
0    JAN      340      360      417
1    FEB      318      342      391
2    MAR      362      406      419
3    APR      348      396      461
4    MAY      363      420      472
5    JUN      435      472      535
6    JUL      491      548      622
7    AUG      505      559      606
8    SEP      404      463      508
9    OCT      359      407      461
10   NOV      310      362      390
11   DEC      337      405      432
Index(['Month', ' "1958"', ' "1959"', ' "1960"'], dtype='object')
   Month   "1958"   "1959"   "1960"
0    JAN      340      360      417
1    FEB      318      342      391
2    MAR      362      406      419
3    APR      348      396      461
4    MAY      363      420      472
5    JUN      435      472      535
6    JUL      491      548      622
7    AUG      505      559      606
8    SEP      404      463      508
9    OCT      359      407      461
10   NOV      310      362      390
11   DEC      337      405      43

In [6]:
# Data Aggregation & Exporting
# Objective: Aggregate data and export the results.

# Question 1: Grouping and Aggregating Data
# 12. Group data by a specific column and calculate the mean for each group.
grouped_mean = df.groupby('Month').mean()
print(grouped_mean)




# Question 2: Exporting Data to CSV
# 13. Export the DataFrame to a new CSV file.


grouped_mean.to_csv('grouped_mean_airtravel.csv')


# Question 3: Aggregating with Multiple Functions
# 14. Apply several aggregate functions to the grouped data.



grouped_agg = df.groupby('Month').agg(['mean', 'sum', 'max', 'min'])
print(grouped_agg)


       "1958"  "1959"  "1960"
Month                        
APR     348.0   396.0   461.0
AUG     505.0   559.0   606.0
DEC     337.0   405.0   432.0
FEB     318.0   342.0   391.0
JAN     340.0   360.0   417.0
JUL     491.0   548.0   622.0
JUN     435.0   472.0   535.0
MAR     362.0   406.0   419.0
MAY     363.0   420.0   472.0
NOV     310.0   362.0   390.0
OCT     359.0   407.0   461.0
SEP     404.0   463.0   508.0
      "1958"                "1959"                "1960"               
        mean  sum  max  min   mean  sum  max  min   mean  sum  max  min
Month                                                                  
APR    348.0  348  348  348  396.0  396  396  396  461.0  461  461  461
AUG    505.0  505  505  505  559.0  559  559  559  606.0  606  606  606
DEC    337.0  337  337  337  405.0  405  405  405  432.0  432  432  432
FEB    318.0  318  318  318  342.0  342  342  342  391.0  391  391  391
JAN    340.0  340  340  340  360.0  360  360  360  417.0  417  417  417
JUL 