In [None]:
1. Working with Series and DataFrames

In [8]:
import pandas as pd

# Creating and displaying a Series
data_series = [5, 6, 9, 8, 10]
series = pd.Series(data_series)
print("Series from a list:")
print(series)

# Creating and displaying a DataFrame from a dictionary
data_frame_dict = {
    'Name': ['Imran', 'Raj', 'Ravi', 'Mansoor'],
    'Age': [25, 35, 23, 20]
}
df = pd.DataFrame(data_frame_dict)
print("\nDataFrame from a dictionary:")
print(df)


Series from a list:
0     5
1     6
2     9
3     8
4    10
dtype: int64

DataFrame from a dictionary:
      Name  Age
0    Imran   25
1      Raj   35
2     Ravi   23
3  Mansoor   20


In [None]:
2. Basic DataFrame Operations

In [9]:
# Selecting a single column
ages = df['Age']
print("\nSelected column (Age):")
print(ages)

# Selecting multiple columns
subset_df = df[['Name', 'Age']]
print("\nSubset of DataFrame:")
print(subset_df)



Selected column (Age):
0    25
1    35
2    23
3    20
Name: Age, dtype: int64

Subset of DataFrame:
      Name  Age
0    Imran   25
1      Raj   35
2     Ravi   23
3  Mansoor   20


In [None]:
3. Filtering and Modifying Data

In [10]:
# Filtering DataFrame based on condition
filtered_df = df[df['Age'] > 40]
print("\nFiltered DataFrame (Age > 40):")
print(filtered_df)

# Modifying DataFrame values
df.loc[0, 'Age'] = 35
print("\nDataFrame after modifying age of the first entry:")
print(df)



Filtered DataFrame (Age > 40):
Empty DataFrame
Columns: [Name, Age]
Index: []

DataFrame after modifying age of the first entry:
      Name  Age
0    Imran   35
1      Raj   35
2     Ravi   23
3  Mansoor   20


In [None]:
4. Handling Missing Data

In [11]:
# Creating a DataFrame with missing values
df_missing = pd.DataFrame({
    'A': [5, 6, None, 8],
    'B': [None, 7, 8, 9]
})

# Checking for missing values
print("\nDataFrame with missing values:")
print(df_missing)
print("\nChecking for missing data:")
print(df_missing.isnull())

# Filling missing values
df_filled = df_missing.fillna(0)
print("\nDataFrame after filling missing values with 0:")
print(df_filled)



DataFrame with missing values:
     A    B
0  5.0  NaN
1  6.0  7.0
2  NaN  8.0
3  8.0  9.0

Checking for missing data:
       A      B
0  False   True
1  False  False
2   True  False
3  False  False

DataFrame after filling missing values with 0:
     A    B
0  5.0  0.0
1  6.0  7.0
2  0.0  8.0
3  8.0  9.0


In [None]:
5. Data Type Conversion

In [12]:
# Converting data types
df['Age'] = df['Age'].astype(float)
print("\nDataFrame with 'Age' converted to float:")
print(df)



DataFrame with 'Age' converted to float:
      Name   Age
0    Imran  35.0
1      Raj  35.0
2     Ravi  23.0
3  Mansoor  20.0


In [None]:
6. Data Analysis

In [13]:
# Summary statistics
print("\nSummary statistics of the DataFrame:")
print(df.describe())

# Grouping data and applying aggregate functions
grouped_df = df.groupby('Age').agg({'Age': 'mean'})
print("\nGrouped data by Age with mean Age:")
print(grouped_df)



Summary statistics of the DataFrame:
             Age
count   4.000000
mean   28.250000
std     7.889867
min    20.000000
25%    22.250000
50%    29.000000
75%    35.000000
max    35.000000

Grouped data by Age with mean Age:
       Age
Age       
20.0  20.0
23.0  23.0
35.0  35.0


In [None]:
7. Merging and Concatenating DataFrames

In [14]:
# Merging DataFrames
df1 = pd.DataFrame({'Key': ['A', 'B', 'C'], 'Value': [1, 2, 3]})
df2 = pd.DataFrame({'Key': ['A', 'B', 'D'], 'Value': [4, 5, 6]})
merged_df = pd.merge(df1, df2, on='Key', how='outer')
print("\nMerged DataFrame:")
print(merged_df)

# Concatenating DataFrames
concatenated_df = pd.concat([df1, df2], ignore_index=True)
print("\nConcatenated DataFrame:")
print(concatenated_df)



Merged DataFrame:
  Key  Value_x  Value_y
0   A      1.0      4.0
1   B      2.0      5.0
2   C      3.0      NaN
3   D      NaN      6.0

Concatenated DataFrame:
  Key  Value
0   A      1
1   B      2
2   C      3
3   A      4
4   B      5
5   D      6


In [None]:
Application in Data Science
Advantages of Using Pandas

Efficiency and Performance: Very efficient in data manipulation functions.
Ease of Use: Offers comprehensive ways of data handling.
Integration: Complements other libraries, such as Libraries for Data Visualization like Matplotlib, Seaborn, Libraries for Data Analysis and Data Mining like Scikit learn.
Handling Large Datasets: Well suited for handling big data processes.
undefined

Data Cleaning: Imputing missing values, duplicates, and data type conversions.
Exploratory Data Analysis (EDA): Data summarization and data visualization.
Time Series Analysis: Management of time series data in finance and economics.
undefined
Pandas play a crucial role in data science as a field, providing robust capabilities for data manipulation and cleansing for analysis. Once you have successfully gone through the examples given above, you will be ready for handling and managing data with Pandas.