In [1]:
import pandas as pd
import numpy as np

# Data Creation

In [2]:
# Creating a Series
data_series = pd.Series([1, 2, 3, 4, 5], name='Numbers')
print("Series:")
print(data_series)

# Creating a DataFrame
data_dict = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}
data_df = pd.DataFrame(data_dict)
print("\nDataFrame from dictionary:")
print(data_df)

# Creating a DataFrame with index and columns
data_df_indexed = pd.DataFrame(data_dict, index=['row1', 'row2', 'row3'], columns=['A', 'B', 'C'])
print("\nDataFrame with custom index and columns:")
print(data_df_indexed)


Series:
0    1
1    2
2    3
3    4
4    5
Name: Numbers, dtype: int64

DataFrame from dictionary:
   A  B  C
0  1  4  7
1  2  5  8
2  3  6  9

DataFrame with custom index and columns:
      A  B  C
row1  1  4  7
row2  2  5  8
row3  3  6  9


# Data Inspection

In [8]:
print("First 2 rows of DataFrame:")
data_df.head(2)

First 2 rows of DataFrame:


Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8


In [9]:
print("Last 2 rows of DataFrame:")
data_df.tail(2)

Last 2 rows of DataFrame:


Unnamed: 0,A,B,C
1,2,5,8
2,3,6,9


In [10]:
print("DataFrame info:")
data_df.info()

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       3 non-null      int64
 1   B       3 non-null      int64
 2   C       3 non-null      int64
dtypes: int64(3)
memory usage: 204.0 bytes


In [11]:
print("DataFrame description:")
data_df.describe()

DataFrame description:


Unnamed: 0,A,B,C
count,3.0,3.0,3.0
mean,2.0,5.0,8.0
std,1.0,1.0,1.0
min,1.0,4.0,7.0
25%,1.5,4.5,7.5
50%,2.0,5.0,8.0
75%,2.5,5.5,8.5
max,3.0,6.0,9.0


# Data Selection and Filtering

In [12]:
# Selecting a column
print("Column 'A':")
data_df['A']

Column 'A':


0    1
1    2
2    3
Name: A, dtype: int64

In [13]:
# Selecting multiple columns
print("Columns 'A' and 'B':")
data_df[['A', 'B']]

Columns 'A' and 'B':


Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [14]:
# Selecting rows by index
print("Row with index 1:")
data_df.loc[1]

Row with index 1:


A    2
B    5
C    8
Name: 1, dtype: int64

In [15]:
# Selecting rows by condition
print("Rows where column 'A' > 1:")
data_df[data_df['A'] > 1]


Rows where column 'A' > 1:


Unnamed: 0,A,B,C
1,2,5,8
2,3,6,9


# Data Manipulation

In [16]:
# Adding a new column
data_df['D'] = [10, 11, 12]
print("DataFrame with new column 'D':")
data_df

DataFrame with new column 'D':


Unnamed: 0,A,B,C,D
0,1,4,7,10
1,2,5,8,11
2,3,6,9,12


In [17]:
# Renaming columns
data_df.rename(columns={'A': 'Alpha', 'B': 'Beta'}, inplace=True)
print("DataFrame with renamed columns:")
data_df


DataFrame with renamed columns:


Unnamed: 0,Alpha,Beta,C,D
0,1,4,7,10
1,2,5,8,11
2,3,6,9,12


In [18]:
# Dropping a column
data_df.drop(columns=['D'], inplace=True)
print("DataFrame after dropping column 'D':")
data_df

DataFrame after dropping column 'D':


Unnamed: 0,Alpha,Beta,C
0,1,4,7
1,2,5,8
2,3,6,9


In [19]:
# Setting a new index
data_df.set_index('Alpha', inplace=True)
print("DataFrame with new index:")
data_df

DataFrame with new index:


Unnamed: 0_level_0,Beta,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,7
2,5,8
3,6,9


In [20]:
# Resetting index
data_df.reset_index(inplace=True)
print("DataFrame after resetting index:")
data_df

DataFrame after resetting index:


Unnamed: 0,Alpha,Beta,C
0,1,4,7
1,2,5,8
2,3,6,9


# Aggregation and Grouping

In [21]:
# Grouping and aggregating data
grouped_df = data_df.groupby('Alpha').sum()
print("Grouped DataFrame with sum aggregation:")
grouped_df

Grouped DataFrame with sum aggregation:


Unnamed: 0_level_0,Beta,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,7
2,5,8
3,6,9


In [22]:
# Aggregating with different functions
agg_df = data_df.agg({
    'Beta': ['sum', 'mean'],
    'Alpha': 'count'
})
print("Aggregated DataFrame with sum, mean, and count:")
agg_df

Aggregated DataFrame with sum, mean, and count:


Unnamed: 0,Beta,Alpha
sum,15.0,
mean,5.0,
count,,3.0


# Missing Data Handling

In [23]:
# Creating a DataFrame with missing values
data_with_na = pd.DataFrame({
    'A': [1, 2, np.nan, 4],
    'B': [5, np.nan, 7, 8]
})
print("DataFrame with missing values:")
data_with_na


DataFrame with missing values:


Unnamed: 0,A,B
0,1.0,5.0
1,2.0,
2,,7.0
3,4.0,8.0


In [24]:
# Filling missing values
filled_df = data_with_na.fillna(value={'A': 0, 'B': data_with_na['B'].mean()})
print("DataFrame after filling missing values:")
filled_df

DataFrame after filling missing values:


Unnamed: 0,A,B
0,1.0,5.0
1,2.0,6.666667
2,0.0,7.0
3,4.0,8.0


In [25]:
# Dropping missing values
dropped_df = data_with_na.dropna()
print("DataFrame after dropping missing values:")
dropped_df

DataFrame after dropping missing values:


Unnamed: 0,A,B
0,1.0,5.0
3,4.0,8.0


# Merging and Joining

In [26]:
# Creating another DataFrame for merging
data_df2 = pd.DataFrame({
    'Alpha': [1, 2, 4],
    'E': ['A', 'B', 'C']
})
print("Second DataFrame for merging:")
data_df2


Second DataFrame for merging:


Unnamed: 0,Alpha,E
0,1,A
1,2,B
2,4,C


In [27]:
# Merging DataFrames
merged_df = pd.merge(data_df, data_df2, on='Alpha', how='inner')
print("Merged DataFrame:")
merged_df

Merged DataFrame:


Unnamed: 0,Alpha,Beta,C,E
0,1,4,7,A
1,2,5,8,B


In [28]:
# Concatenating DataFrames
concat_df = pd.concat([data_df, data_df2], axis=1)
print("Concatenated DataFrame:")
concat_df

Concatenated DataFrame:


Unnamed: 0,Alpha,Beta,C,Alpha.1,E
0,1,4,7,1,A
1,2,5,8,2,B
2,3,6,9,4,C
