Q1. List any five functions of the pandas library with execution.

> read_csv(): Reads a CSV file into a DataFrame.

In [None]:
import pandas as pd
df = pd.read_csv('data.csv')
print(df.head())

> describe(): Generates descriptive statistics.

In [None]:
print(df.describe())

> drop(): Drops specified labels from rows or columns.

In [None]:
df = df.drop(columns=['Column_to_drop'])
print(df.head())

> groupby(): Groups DataFrame using a mapper or by a series of columns.

In [None]:
grouped_df = df.groupby('Category').sum()
print(grouped_df)

> merge(): Merges DataFrame objects by performing a database-style join operation.

In [None]:
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value': [4, 5, 6]})
merged_df = pd.merge(df1, df2, on='key', how='inner')
print(merged_df)

Q2. Given a Pandas DataFrame df with columns 'A', 'B', and 'C', write a Python function to re-index the DataFrame with a new index that starts from 1 and increments by 2 for each row.

In [None]:
def reindex_dataframe(df):
    new_index = range(1, 2 * len(df) + 1, 2)
    df.index = new_index
    return df

df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
reindexed_df = reindex_dataframe(df)
print(reindexed_df)


Q3. You have a Pandas DataFrame df with a column named 'Values'. Write a Python function that iterates over the DataFrame and calculates the sum of the first three values in the 'Values' column. The function should print the sum to the console.

In [None]:
def sum_first_three_values(df):
    sum_values = df['Values'].iloc[:3].sum()
    print(f"The sum of the first three values is: {sum_values}")

df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
sum_first_three_values(df)

Q4. Given a Pandas DataFrame df with a column 'Text', write a Python function to create a new column 'Word_Count' that contains the number of words in each row of the 'Text' column.

In [None]:
def add_word_count_column(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(x.split()))
    return df

df = pd.DataFrame({'Text': ['Hello world', 'Pandas is great', 'Data Science']})
df = add_word_count_column(df)
print(df)

Q5. How are DataFrame.size and DataFrame.shape different?

> DataFrame.size: Returns the number of elements in the DataFrame (total number of cells).

In [None]:
size = df.size
print(f"Size of DataFrame: {size}")

> DataFrame.shape: Returns a tuple representing the dimensionality of the DataFrame (number of rows and columns).

In [None]:
shape = df.shape
print(f"Shape of DataFrame: {shape}")

Q6. Which function of pandas do we use to read an Excel file?

> read_excel(): Reads an Excel file into a DataFrame.

In [None]:
df = pd.read_excel('data.xlsx')
print(df.head())

Q7. You have a Pandas DataFrame df that contains a column named 'Email' that contains email addresses in the format 'username@domain.com'. Write a Python function that creates a new column 'Username' in df that contains only the username part of each email address.

In [None]:
def add_username_column(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df

df = pd.DataFrame({'Email': ['john.doe@example.com', 'jane.doe@example.com']})
df = add_username_column(df)
print(df)

Q8. You have a Pandas DataFrame df with columns 'A', 'B', and 'C'. Write a Python function that selects all rows where the value in column 'A' is greater than 5 and the value in column 'B' is less than 10. The function should return a new DataFrame that contains only the selected rows.

In [None]:
def select_rows(df):
    selected_rows = df[(df['A'] > 5) & (df['B'] < 10)]
    return selected_rows

df = pd.DataFrame({'A': [3, 8, 6, 2, 9], 'B': [5, 2, 9, 3, 1], 'C': [1, 7, 4, 5, 2]})
selected_df = select_rows(df)
print(selected_df)


Q9. Given a Pandas DataFrame df with a column 'Values', write a Python function to calculate the mean, median, and standard deviation of the values in the 'Values' column.

In [None]:
def calculate_statistics(df):
    mean_value = df['Values'].mean()
    median_value = df['Values'].median()
    std_dev_value = df['Values'].std()
    
    print(f"Mean: {mean_value}")
    print(f"Median: {median_value}")
    print(f"Standard Deviation: {std_dev_value}")

df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
calculate_statistics(df)

Q10. Given a Pandas DataFrame df with a column 'Sales' and a column 'Date', write a Python function to create a new column 'MovingAverage' that contains the moving average of the sales for the past 7 days for each row in the DataFrame. The moving average should be calculated using a window of size 7 and should include the current day.

In [None]:
def add_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7).mean()
    return df

df = pd.DataFrame({
    'Date': pd.date_range(start='2023-01-01', periods=10, freq='D'),
    'Sales': [100, 150, 200, 250, 300, 350, 400, 450, 500, 550]
})
df = add_moving_average(df)
print(df)

Q11. You have a Pandas DataFrame df with a column 'Date'. Write a Python function that creates a new column 'Weekday' in the DataFrame. The 'Weekday' column should contain the weekday name (e.g., Monday, Tuesday) corresponding to each date in the 'Date' column.

In [None]:
def add_weekday_column(df):
    df['Weekday'] = df['Date'].dt.day_name()
    return df

df = pd.DataFrame({
    'Date': pd.date_range(start='2023-01-01', periods=5, freq='D')
})
df = add_weekday_column(df)
print(df)

Q12. Given a Pandas DataFrame df with a column 'Date' that contains timestamps, write a Python function to select all rows where the date is between '2023-01-01' and '2023-01-31'.

In [None]:
def select_date_range(df):
    mask = (df['Date'] >= '2023-01-01') & (df['Date'] <= '2023-01-31')
    return df.loc[mask]

df = pd.DataFrame({
    'Date': pd.date_range(start='2023-01-01', periods=60, freq='D'),
    'Value': range(60)
})
selected_df = select_date_range(df)
print(selected_df)

Q13. To use the basic functions of pandas, what is the first and foremost necessary library that needs to be imported?

> The first and foremost necessary library to import is pandas.

In [None]:
import pandas as pd