In [3]:
import pandas as pd

Creating the dataframe

In [5]:
# Create a DataFrame
data = {'Name': ['John', 'Anna', 'Peter', 'Linda'],
        'Age': [28, 24, 35, 32],
        'City': ['New York', 'Paris', 'Berlin', 'London']}
df = pd.DataFrame(data)

# Create a Series
age_series = pd.Series([28, 24, 35, 32], name="Age")
df.head()

Unnamed: 0,Name,Age,City
0,John,28,New York
1,Anna,24,Paris
2,Peter,35,Berlin
3,Linda,32,London


Indexing and Selecting Data:

In [6]:
# Accessing rows using .loc[] and .iloc[]
print(df.loc[1])  # Row with index 1 (uses label-based indexing)
print(df.iloc[1]) # Row at position 1 (uses positional indexing)

# Accessing specific data
print(df['Name']) # Accessing a single column
print(df.loc[0, 'Age']) # Accessing specific value (row 0, column 'Age')


Name     Anna
Age        24
City    Paris
Name: 1, dtype: object
Name     Anna
Age        24
City    Paris
Name: 1, dtype: object
0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object
28


Handling Missing Data:

In [8]:
# Create a DataFrame with missing data
data = {'Name': ['John', 'Anna', 'Peter', None],
        'Age': [28, None, 35, 32],
        'City': ['New York', 'Paris', 'Berlin', 'London']}
df = pd.DataFrame(data)



In [9]:

# Checking for missing values
print(df.isnull())

    Name    Age   City
0  False  False  False
1  False   True  False
2  False  False  False
3   True  False  False


In [10]:
# Drop rows with any missing data
df_cleaned = df.dropna()

In [11]:

# Fill missing values with a specific value
df_filled = df.fillna({'Name': 'Unknown', 'Age': df['Age'].mean()})
print(df_cleaned)

    Name   Age      City
0   John  28.0  New York
2  Peter  35.0    Berlin


Renaming Columns:

In [13]:
df.rename(columns={'Name': 'Full Name', 'City': 'Location'}, inplace=True)
print(df)


  Full Name   Age  Location
0      John  28.0  New York
1      Anna   NaN     Paris
2     Peter  35.0    Berlin
3      None  32.0    London


3. Operations
Arithmetic Operations:

In [14]:
# Adding a new column based on an existing one
df['Age Next Year'] = df['Age'] + 1
print(df)

  Full Name   Age  Location  Age Next Year
0      John  28.0  New York           29.0
1      Anna   NaN     Paris            NaN
2     Peter  35.0    Berlin           36.0
3      None  32.0    London           33.0


Aggregation:

In [15]:
# Summing up Age column
print(df['Age'].sum())

# Mean, min, and max for Age
print(df['Age'].mean())
print(df['Age'].min())
print(df['Age'].max())


95.0
31.666666666666668
28.0
35.0


GroupBy:

In [16]:
# Group by a column (e.g., City) and calculate the mean of Age
grouped = df.groupby('Location')['Age'].mean()
print(grouped)


Location
Berlin      35.0
London      32.0
New York    28.0
Paris        NaN
Name: Age, dtype: float64


4. Merging and Joining
Merging DataFrames:

In [17]:
# Create two DataFrames
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['John', 'Anna', 'Peter']})
df2 = pd.DataFrame({'ID': [1, 2, 3], 'Age': [28, 24, 35]})

# Merge on 'ID' column
merged_df = pd.merge(df1, df2, on='ID')
print(merged_df)


   ID   Name  Age
0   1   John   28
1   2   Anna   24
2   3  Peter   35


Concatenating DataFrames:

In [18]:
# Concatenating DataFrames vertically
df1 = pd.DataFrame({'ID': [1, 2], 'Name': ['John', 'Anna']})
df2 = pd.DataFrame({'ID': [3, 4], 'Name': ['Peter', 'Linda']})

concatenated_df = pd.concat([df1, df2], ignore_index=True)
print(concatenated_df)

   ID   Name
0   1   John
1   2   Anna
2   3  Peter
3   4  Linda


5. Filtering and Slicing
Filtering:

In [19]:
# Filter rows where Age is greater than 30
filtered_df = df[df['Age'] > 30]
print(filtered_df)


  Full Name   Age Location  Age Next Year
2     Peter  35.0   Berlin           36.0
3      None  32.0   London           33.0


Slicing:

In [20]:
# Select specific rows and columns
sliced_df = df.iloc[1:3, 0:2]  # Rows 1 and 2, Columns 0 and 1
print(sliced_df)


  Full Name   Age
1      Anna   NaN
2     Peter  35.0


6. Sorting
Sorting by Values:

In [21]:
# Sort by Age in ascending order
sorted_df = df.sort_values(by='Age')
print(sorted_df)

# Sort by Age in descending order
sorted_desc_df = df.sort_values(by='Age', ascending=False)
print(sorted_desc_df)


  Full Name   Age  Location  Age Next Year
0      John  28.0  New York           29.0
3      None  32.0    London           33.0
2     Peter  35.0    Berlin           36.0
1      Anna   NaN     Paris            NaN
  Full Name   Age  Location  Age Next Year
2     Peter  35.0    Berlin           36.0
3      None  32.0    London           33.0
0      John  28.0  New York           29.0
1      Anna   NaN     Paris            NaN


Sorting by Index:

In [22]:
# Sort by index
df_sorted_by_index = df.sort_index()
print(df_sorted_by_index)


  Full Name   Age  Location  Age Next Year
0      John  28.0  New York           29.0
1      Anna   NaN     Paris            NaN
2     Peter  35.0    Berlin           36.0
3      None  32.0    London           33.0


7. Handling Dates
DateTime Conversion:

In [23]:
# Convert a column to datetime
df['Date'] = pd.to_datetime(['2025-01-01', '2025-02-02', '2025-03-03', '2025-04-04'])

# Extracting year, month, and day
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
print(df)


  Full Name   Age  Location  Age Next Year       Date  Year  Month  Day
0      John  28.0  New York           29.0 2025-01-01  2025      1    1
1      Anna   NaN     Paris            NaN 2025-02-02  2025      2    2
2     Peter  35.0    Berlin           36.0 2025-03-03  2025      3    3
3      None  32.0    London           33.0 2025-04-04  2025      4    4
