In [1]:
import pandas as pd

# Creating a sample DataFrame for demonstration
data = {
    'A': [1, 2, 3, 4, 5],
    'B': [10, 20, 30, 40, 50],
    'C': ['one', 'two', 'three', 'four', 'five']
}
df = pd.DataFrame(data)

# 1. head() - Returns the first n rows of the DataFrame
print("head() example:")
print(df.head(3))

# 2. describe() - Generates descriptive statistics
print("\ndescribe() example:")
print(df.describe())

# 3. drop() - Removes rows or columns
print("\ndrop() example:")
print(df.drop(columns=['C']))

# 4. groupby() - Groups the data based on some criteria
print("\ngroupby() example:")
print(df.groupby('A').sum())

# 5. merge() - Merges two DataFrames
df2 = pd.DataFrame({
    'A': [3, 4, 5, 6, 7],
    'D': [100, 200, 300, 400, 500]
})
print("\nmerge() example:")
print(pd.merge(df, df2, on='A', how='inner'))


head() example:
   A   B      C
0  1  10    one
1  2  20    two
2  3  30  three

describe() example:
              A          B
count  5.000000   5.000000
mean   3.000000  30.000000
std    1.581139  15.811388
min    1.000000  10.000000
25%    2.000000  20.000000
50%    3.000000  30.000000
75%    4.000000  40.000000
max    5.000000  50.000000

drop() example:
   A   B
0  1  10
1  2  20
2  3  30
3  4  40
4  5  50

groupby() example:
    B
A    
1  10
2  20
3  30
4  40
5  50

merge() example:
   A   B      C    D
0  3  30  three  100
1  4  40   four  200
2  5  50   five  300


In [2]:
def reindex_dataframe(df):
    new_index = range(1, 2 * len(df) + 1, 2)
    df.index = new_index
    return df

# Sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
})

# Re-indexing the DataFrame
reindexed_df = reindex_dataframe(df)
print(reindexed_df)


   A  B  C
1  1  4  7
3  2  5  8
5  3  6  9


In [3]:
def sum_first_three(df):
    sum_values = df['Values'][:3].sum()
    print(sum_values)

# Sample DataFrame
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})

# Calculating the sum of the first three values
sum_first_three(df)


60


In [4]:
def add_word_count_column(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df

# Sample DataFrame
df = pd.DataFrame({'Text': ['Hello world', 'Pandas is great', 'Data science is fun']})

# Adding 'Word_Count' column
df_with_word_count = add_word_count_column(df)
print(df_with_word_count)


                  Text  Word_Count
0          Hello world           2
1      Pandas is great           3
2  Data science is fun           4


In [5]:
# Sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6]
})

# Size of the DataFrame
print("Size:", df.size)

# Shape of the DataFrame
print("Shape:", df.shape)


Size: 6
Shape: (3, 2)


Q6. Which function of pandas do we use to read an Excel file?
The function used to read an Excel file in pandas is pd.read_excel().

In [6]:
def add_username_column(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df

# Sample DataFrame
df = pd.DataFrame({'Email': ['john.doe@example.com', 'jane.smith@test.com']})

# Adding 'Username' column
df_with_username = add_username_column(df)
print(df_with_username)


                  Email    Username
0  john.doe@example.com    john.doe
1   jane.smith@test.com  jane.smith


In [7]:
def select_rows(df):
    filtered_df = df[(df['A'] > 5) & (df['B'] < 10)]
    return filtered_df

# Sample DataFrame
df = pd.DataFrame({
    'A': [3, 8, 6, 2, 9],
    'B': [5, 2, 9, 3, 1],
    'C': [1, 7, 4, 5, 2]
})

# Selecting the rows
selected_df = select_rows(df)
print(selected_df)


   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [8]:
def calculate_statistics(df):
    mean_val = df['Values'].mean()
    median_val = df['Values'].median()
    std_dev_val = df['Values'].std()
    return mean_val, median_val, std_dev_val

# Sample DataFrame
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})

# Calculating statistics
mean_val, median_val, std_dev_val = calculate_statistics(df)
print(f"Mean: {mean_val}, Median: {median_val}, Std Dev: {std_dev_val}")


Mean: 30.0, Median: 30.0, Std Dev: 15.811388300841896


In [9]:
def add_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    return df

# Sample DataFrame
data = {
    'Date': pd.date_range(start='2023-01-01', periods=10, freq='D'),
    'Sales': [100, 200, 150, 300, 400, 500, 600, 700, 800, 900]
}
df = pd.DataFrame(data)

# Adding 'MovingAverage' column
df_with_moving_average = add_moving_average(df)
print(df_with_moving_average)


        Date  Sales  MovingAverage
0 2023-01-01    100     100.000000
1 2023-01-02    200     150.000000
2 2023-01-03    150     150.000000
3 2023-01-04    300     187.500000
4 2023-01-05    400     230.000000
5 2023-01-06    500     275.000000
6 2023-01-07    600     321.428571
7 2023-01-08    700     407.142857
8 2023-01-09    800     492.857143
9 2023-01-10    900     600.000000


In [10]:
def add_weekday_column(df):
    df['Weekday'] = df['Date'].dt.day_name()
    return df

# Sample DataFrame
df = pd.DataFrame({'Date': pd.date_range(start='2023-01-01', periods=5, freq='D')})

# Adding 'Weekday' column
df_with_weekday = add_weekday_column(df)
print(df_with_weekday)


        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday


In [11]:
def select_date_range(df):
    filtered_df = df[(df['Date'] >= '2023-01-01') & (df['Date'] <= '2023-01-31')]
    return filtered_df

# Sample DataFrame
data = {
    'Date': pd.date_range(start='2022-12-25', periods=40, freq='D'),
    'Values': range(40)
}
df = pd.DataFrame(data)

# Selecting rows within the date range
selected_df = select_date_range(df)
print(selected_df)


         Date  Values
7  2023-01-01       7
8  2023-01-02       8
9  2023-01-03       9
10 2023-01-04      10
11 2023-01-05      11
12 2023-01-06      12
13 2023-01-07      13
14 2023-01-08      14
15 2023-01-09      15
16 2023-01-10      16
17 2023-01-11      17
18 2023-01-12      18
19 2023-01-13      19
20 2023-01-14      20
21 2023-01-15      21
22 2023-01-16      22
23 2023-01-17      23
24 2023-01-18      24
25 2023-01-19      25
26 2023-01-20      26
27 2023-01-21      27
28 2023-01-22      28
29 2023-01-23      29
30 2023-01-24      30
31 2023-01-25      31
32 2023-01-26      32
33 2023-01-27      33
34 2023-01-28      34
35 2023-01-29      35
36 2023-01-30      36
37 2023-01-31      37
