## Q1

In [None]:
Pandas is a popular Python library for data manipulation and analysis. Here are five common functions 
and their execution examples:

1. read_csv(): Used to read data from a CSV file into a DataFrame.

import pandas as pd
# Read data from a CSV file into a DataFrame
df = pd.read_csv('data.csv')


2. head(): Displays the first n rows of a DataFrame. By default, it shows the first 5 rows.

# Display the first 5 rows of the DataFrame
df.head()


3. info(): Provides a concise summary of the DataFrame, including data types, non-null values, and memory usage.

# Display summary information about the DataFrame
df.info()


4. describe(): Generates descriptive statistics of the DataFrame, such as count, mean, standard deviation, 
minimum, and maximum values.

# Display descriptive statistics of the DataFrame
df.describe()


5. groupby(): Used to group data based on one or more columns and perform aggregation operations.

# Group data by a column and calculate the mean for each group
grouped = df.groupby('Category')['Value'].mean()


## Q2

In [None]:
import pandas as pd

def reindex_with_increment(df):
    # Create a new index that starts from 1 and increments by 2
    new_index = pd.Index(range(1, len(df) * 2, 2))
    
    # Assign the new index to the DataFrame
    df = df.set_index(new_index)
    
    return df

# Sample DataFrame 'df' with columns 'A', 'B', and 'C'
data = {'A': [10, 20, 30, 40],
        'B': [50, 60, 70, 80],
        'C': [90, 100, 110, 120]}
df = pd.DataFrame(data)

# Call the function to re-index the DataFrame
df = reindex_with_increment(df)

# Print the re-indexed DataFrame
print(df)


## Q3

In [None]:
import pandas as pd

def calculate_sum_of_first_three_values(df):
    values_column = df['Values']   
    if len(values_column) >= 3:
        first_three_values_sum = values_column.iloc[:3].sum()
        print("Sum of the first three values:", first_three_values_sum)
    else:
        print("The 'Values' column does not have at least three values.")
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
calculate_sum_of_first_three_values(df)


## Q4

In [None]:
import pandas as pd
def add_word_count_column(df):   
    df['Word_Count'] = df['Text'].apply(lambda text: len(text.split()))
data = {'Text': ["This is a sample sentence.", "Pandas is a powerful library.", "Word count in Python is fun!"]}
df = pd.DataFrame(data)
add_word_count_column(df)
print(df)


## Q5

In [None]:
1. DataFrame.size:

i.  DataFrame.size returns the total number of elements in the DataFrame, which is equivalent to the product 
    of the number of rows and the number of columns.
ii. It provides a single integer value representing the total size of the DataFrame.

Example:-
import pandas as pd
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
size = df.size  # size = 6 (3 rows * 2 columns)



2. DataFrame.shape:

i.  DataFrame.shape returns a tuple representing the dimensions of the DataFrame. The tuple has two elements:
    the number of rows and the number of columns, respectively.
ii. It provides a more detailed view of the DataFrame's structure.

Example:- 
import pandas as pd
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
shape = df.shape  # shape = (3, 2) (3 rows and 2 columns)


## Q6

In [None]:
To read an Excel file in Pandas, we can use the read_excel().

## Q7

In [None]:
import pandas as pd

def extract_username_from_email(df):    
    df['Username'] = df['Email'].apply(lambda email: email.split('@')[0])
data = {'Email': ['john.doe@example.com', 'jane.smith@example.com', 'bob.johnson@example.com']}
df = pd.DataFrame(data)
extract_username_from_email(df)
print(df)



## Q8

In [None]:
import pandas as pd

def select_rows(df):  
    condition = (df['A'] > 5) & (df['B'] < 10) 
    selected_rows = df[condition]
    return selected_rows
data = {'A': [2, 7, 8, 3, 6],
        'B': [4, 5, 9, 12, 2],
        'C': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
selected_df = select_rows(df)
print(selected_df)


## Q9

In [None]:
import pandas as pd

def calculate_stats(df):
    mean_value = df['Values'].mean()
    median_value = df['Values'].median()
    std_deviation = df['Values'].std()
    
    return mean_value, median_value, std_deviation
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
mean, median, std = calculate_stats(df)
print(f"Mean: {mean}")
print(f"Median: {median}")
print(f"Standard Deviation: {std}")


## Q10

In [None]:
import pandas as pd

def calculate_moving_average(df, window_size=7):
    df = df.sort_values('Date')
    df['MovingAverage'] = df['Sales'].rolling(window=window_size, min_periods=1).mean()    
    return df
data = {'Sales': [10, 15, 20, 25, 30, 35, 40, 45, 50],
        'Date': pd.date_range(start='2023-10-01', periods=9, freq='D')}
df = pd.DataFrame(data)
df = calculate_moving_average(df)
print(df)


## Q11

In [None]:
import pandas as pd

def add_weekday_column(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df['Weekday'] = df['Date'].dt.day_name()  
    return df
data = {'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']}
df = pd.DataFrame(data)
df = add_weekday_column(df)
print(df)


## Q12

In [None]:
import pandas as pd

def select_rows_in_date_range(df, start_date, end_date):
    df['Date'] = pd.to_datetime(df['Date'])
    condition = (df['Date'] >= start_date) & (df['Date'] <= end_date)
    selected_rows = df[condition]
    
    return selected_rows
data = {'Date': ['2023-01-05', '2023-01-15', '2023-02-10', '2023-01-25', '2023-03-01']}
df = pd.DataFrame(data)
start_date = '2023-01-01'
end_date = '2023-01-31'
selected_df = select_rows_in_date_range(df, start_date, end_date)

print(selected_df)


## Q13

In [None]:
import pandas as pd