### Q1. List any five functions of the pandas library with execution.

In [1]:
#A1.
import pandas as pd
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}
df = pd.DataFrame(data)

# 1. head(): Returns the first n rows of the DataFrame.
print("head():")
print(df.head(2))

# 2. describe(): Generates descriptive statistics.
print("\ndescribe():")
print(df.describe())

# 3. groupby(): Groups DataFrame using a mapper or by a Series of columns.
grouped = df.groupby('A').sum()
print("\ngroupby():")
print(grouped)

# 4. dropna(): Removes missing values.
print("\ndropna():")
print(df.dropna())

# 5. merge(): Merges DataFrame objects by performing a database-style join.
df2 = pd.DataFrame({
    'A': [1, 2],
    'D': [10, 20]
})
merged_df = pd.merge(df, df2, on='A', how='left')
print("\nmerge():")
print(merged_df)

head():
   A  B  C
0  1  4  7
1  2  5  8

describe():
         A    B    C
count  3.0  3.0  3.0
mean   2.0  5.0  8.0
std    1.0  1.0  1.0
min    1.0  4.0  7.0
25%    1.5  4.5  7.5
50%    2.0  5.0  8.0
75%    2.5  5.5  8.5
max    3.0  6.0  9.0

groupby():
   B  C
A      
1  4  7
2  5  8
3  6  9

dropna():
   A  B  C
0  1  4  7
1  2  5  8
2  3  6  9

merge():
   A  B  C     D
0  1  4  7  10.0
1  2  5  8  20.0
2  3  6  9   NaN


### Q2. Given a Pandas DataFrame df with columns 'A', 'B', and 'C', write a Python function to re-index the DataFrame with a new index that starts from 1 and increments by 2 for each row.

In [2]:
#A2.
import pandas as pd

def reindex_dataframe(df):
    new_index = range(1, 2*len(df) + 1, 2)
    df.index = new_index
    return df

# Sample DataFrame
data = {
    'A': [10, 20, 30],
    'B': [40, 50, 60],
    'C': [70, 80, 90]
}
df = pd.DataFrame(data)
print(reindex_dataframe(df))

    A   B   C
1  10  40  70
3  20  50  80
5  30  60  90


### Q3. You have a Pandas DataFrame df with a column named 'Values'. Write a Python function that iterates over the DataFrame and calculates the sum of the first three values in the 'Values' column.

In [3]:
#A3. 
import pandas as pd

def sum_first_three_values(df):
    return df['Values'].head(3).sum()

# Sample DataFrame
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
print(sum_first_three_values(df))

60


### Q4. Given a Pandas DataFrame df with a column 'Text', write a Python function to create a new column 'Word_Count' that contains the number of words in each row of the 'Text' column.

In [None]:
#A4.
import pandas as pd

def add_word_count_column(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df

# Sample DataFrame
data = {'Text': ['Hello world', 'Pandas is great', 'Python programming']}
df = pd.DataFrame(data)
print(add_word_count_column(df))

### Q5. How are DataFrame.size() and DataFrame.shape() different?

A5. Where the `DataFrame.size` returns the total number of elements in the DataFrame (rows * columns), while `DataFrame.shape` returns a tuple representing the dimensionality of the DataFrame (rows, columns).

### Q6. Which function of pandas do we use to read an Excel file?

A6. The function we used to read an Excel file is `pd.read_excel()`.

### Q7. You have a Pandas DataFrame df that contains a column named 'Email'. Write a Python function that creates a new column 'Username' in df that contains only the username part of each email address.

In [5]:
#A7.
import pandas as pd

def add_username_column(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df

# Sample DataFrame
data = {'Email': ['john.doe@example.com', 'jane.smith@domain.com']}
df = pd.DataFrame(data)
print(add_username_column(df))

                   Email    Username
0   john.doe@example.com    john.doe
1  jane.smith@domain.com  jane.smith


### Q8. You have a Pandas DataFrame df with columns 'A', 'B', and 'C'. Write a Python function that selects all rows where the value in column 'A' is greater than 5 and the value in column 'B' is less than 10.


In [4]:

#A8.
import pandas as pd

def filter_dataframe(df):
    return df[(df['A'] > 5) & (df['B'] < 10)]

# Sample DataFrame
data = {
    'A': [3, 8, 6, 2, 9],
    'B': [5, 2, 9, 3, 1],
    'C': [1, 7, 4, 5, 2]
}
df = pd.DataFrame(data)
print(filter_dataframe(df))

   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2



### Q9. Given a Pandas DataFrame df with a column 'Values', write a Python function to calculate the mean, median, and standard deviation of the values in the 'Values' column.


In [6]:

#A9.
import pandas as pd

def calculate_statistics(df):
    mean = df['Values'].mean()
    median = df['Values'].median()
    std_dev = df['Values'].std()
    return mean, median, std_dev

# Sample DataFrame
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
print(calculate_statistics(df))

(np.float64(30.0), np.float64(30.0), np.float64(15.811388300841896))


### Q10. Given a Pandas DataFrame df with a column 'Sales' and a column 'Date', write a Python function to create a new column 'MovingAverage' that contains the moving average of the sales for the past 7 days for each row.


In [7]:

#A10. 
import pandas as pd

def add_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7).mean()
    return df

# Sample DataFrame
data = {
    'Date': pd.date_range(start='2023-01-01', periods=10, freq='D'),
    'Sales': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
}
df = pd.DataFrame(data)
print(add_moving_average(df))

        Date  Sales  MovingAverage
0 2023-01-01     10            NaN
1 2023-01-02     20            NaN
2 2023-01-03     30            NaN
3 2023-01-04     40            NaN
4 2023-01-05     50            NaN
5 2023-01-06     60            NaN
6 2023-01-07     70           40.0
7 2023-01-08     80           50.0
8 2023-01-09     90           60.0
9 2023-01-10    100           70.0



### Q11. You have a Pandas DataFrame df with a column 'Date'. Write a Python function that creates a new column 'Weekday' in the DataFrame.


In [8]:

#A11.
import pandas as pd

def add_weekday_column(df):
    df['Weekday'] = df['Date'].dt.day_name()
    return df

# Sample DataFrame
data = {'Date': pd.date_range(start='2023-01-01', periods=5, freq='D')}
df = pd.DataFrame(data)
print(add_weekday_column(df))

        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday



### Q12. Given a Pandas DataFrame df with a column 'Date' that contains timestamps, write a Python function to select all rows where the date is between '2023-01-01' and '2023-01-31'.


In [9]:

#A12.
import pandas as pd

def filter_date_range(df):
    start_date = '2023-01-01'
    end_date = '2023-01-31'
    mask = (df['Date'] >= start_date) & (df['Date'] <= end_date)
    return df[mask]

# Sample DataFrame
data = {
    'Date': pd.date_range(start='2022-12-25', periods=40, freq='D'),
    'Value': range(40)
}
df = pd.DataFrame(data)
print(filter_date_range(df))

         Date  Value
7  2023-01-01      7
8  2023-01-02      8
9  2023-01-03      9
10 2023-01-04     10
11 2023-01-05     11
12 2023-01-06     12
13 2023-01-07     13
14 2023-01-08     14
15 2023-01-09     15
16 2023-01-10     16
17 2023-01-11     17
18 2023-01-12     18
19 2023-01-13     19
20 2023-01-14     20
21 2023-01-15     21
22 2023-01-16     22
23 2023-01-17     23
24 2023-01-18     24
25 2023-01-19     25
26 2023-01-20     26
27 2023-01-21     27
28 2023-01-22     28
29 2023-01-23     29
30 2023-01-24     30
31 2023-01-25     31
32 2023-01-26     32
33 2023-01-27     33
34 2023-01-28     34
35 2023-01-29     35
36 2023-01-30     36
37 2023-01-31     37



### Q13. To use the basic functions of pandas, what is the first and foremost necessary library that needs to be imported?

A13. The first and foremost necessary library that needs to be imported is `pandas`.

import pandas as pd