In [2]:
#Q1. List any five functions of the pandas library with execution.

#Sure, let's go through five commonly used functions in pandas:


import pandas as pd

# Sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 22, 35],
        'Salary': [50000, 60000, 45000, 70000]}
df = pd.DataFrame(data)

# 1. Head function - Display the first n rows of the DataFrame (default n=5)
print("Head Function:")
print(df.head())
print()

# 2. Describe function - Generates descriptive statistics of the DataFrame
print("Describe Function:")
print(df.describe())
print()

# 3. Shape function - Returns a tuple representing the dimensionality of the DataFrame
print("Shape Function:")
print(df.shape)
print()

# 4. Groupby function - Group DataFrame using a mapper or by a Series of columns
print("Groupby Function:")
grouped_data = df.groupby('Age').mean()
print(grouped_data)
print()

# 5. Value_counts function - Returns a Series containing counts of unique values
print("Value Counts Function:")
count_by_age = df['Age'].value_counts()
print(count_by_age)

Head Function:
      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   22   45000
3    David   35   70000

Describe Function:
             Age        Salary
count   4.000000      4.000000
mean   28.000000  56250.000000
std     5.715476  11086.778913
min    22.000000  45000.000000
25%    24.250000  48750.000000
50%    27.500000  55000.000000
75%    31.250000  62500.000000
max    35.000000  70000.000000

Shape Function:
(4, 3)

Groupby Function:
      Salary
Age         
22   45000.0
25   50000.0
30   60000.0
35   70000.0

Value Counts Function:
25    1
30    1
22    1
35    1
Name: Age, dtype: int64


  grouped_data = df.groupby('Age').mean()


In [3]:
#Q2. Python function to re-index the DataFrame:


def reindex_dataframe(df):
    new_index = list(range(1, len(df)*2, 2))
    df = df.set_index(pd.Index(new_index))
    return df

# Example usage:
new_df = reindex_dataframe(df)
print(new_df)

      Name  Age  Salary
1    Alice   25   50000
3      Bob   30   60000
5  Charlie   22   45000
7    David   35   70000


In [4]:
#Q3. Python function to calculate the sum of the first three values:


def sum_of_first_three_values(df):
    values_sum = df['Values'][:3].sum()
    print(f"Sum of the first three values: {values_sum}")

# Example usage:
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
sum_of_first_three_values(df)

Sum of the first three values: 60


In [5]:
#Q4. Python function to create a new column 'Word_Count':


def add_word_count_column(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df

# Example usage:
df = pd.DataFrame({'Text': ['Hello world', 'Python is awesome', 'Data Science']})
new_df = add_word_count_column(df)
print(new_df)

                Text  Word_Count
0        Hello world           2
1  Python is awesome           3
2       Data Science           2


In [9]:
#Q5. How are DataFrame.size() and DataFrame.shape() different?

'''DataFrame.size() returns the number of elements in the DataFrame (total cells).
DataFrame.shape() returns a tuple representing the dimensions of the DataFrame (number of rows, number of columns)'''

'DataFrame.size() returns the number of elements in the DataFrame (total cells).\nDataFrame.shape() returns a tuple representing the dimensions of the DataFrame (number of rows, number of columns)'

In [11]:
#Q6. Function to read an Excel file:

def read_excel_file(file_path):
    df = pd.read_excel(file_path)
    return df

# Example usage:
excel_file_path = 'path/to/your/file.xlsx'
df = read_excel_file(excel_file_path)


In [12]:
#Q7. Function to extract usernames from email addresses:


def extract_usernames(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df

# Example usage:
df = pd.DataFrame({'Email': ['john.doe@example.com', 'alice.smith@example.com']})
new_df = extract_usernames(df)
print(new_df)

                     Email     Username
0     john.doe@example.com     john.doe
1  alice.smith@example.com  alice.smith


In [13]:
# Q8. Function to select rows based on conditions:


def select_rows(df):
    selected_rows = df[(df['A'] > 5) & (df['B'] < 10)]
    return selected_rows

# Example usage:
df = pd.DataFrame({'A': [3, 8, 6, 2, 9],
                   'B': [5, 2, 9, 3, 1],
                   'C': [1, 7, 4, 5, 2]})
selected_df = select_rows(df)
print(selected_df)

   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [14]:
#Q9. Function to calculate mean, median, and standard deviation:


def calculate_stats(df):
    mean_value = df['Values'].mean()
    median_value = df['Values'].median()
    std_deviation = df['Values'].std()
    print(f"Mean: {mean_value}, Median: {median_value}, Standard Deviation: {std_deviation}")

# Example usage:
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
calculate_stats(df)

Mean: 30.0, Median: 30.0, Standard Deviation: 15.811388300841896


In [15]:
# Q10. Function to create a MovingAverage column:


def moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    return df

# Example usage:
df = pd.DataFrame({'Sales': [10, 15, 20, 25, 30, 35, 40, 45]})
new_df = moving_average(df)
print(new_df)

   Sales  MovingAverage
0     10           10.0
1     15           12.5
2     20           15.0
3     25           17.5
4     30           20.0
5     35           22.5
6     40           25.0
7     45           30.0


In [16]:
#Q11. Function to create a 'Weekday' column:

def add_weekday_column(df):
    df['Weekday'] = pd.to_datetime(df['Date']).dt.day_name()
    return df

# Example usage:
df = pd.DataFrame({'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']})
new_df = add_weekday_column(df)
print(new_df)


         Date    Weekday
0  2023-01-01     Sunday
1  2023-01-02     Monday
2  2023-01-03    Tuesday
3  2023-01-04  Wednesday
4  2023-01-05   Thursday


In [17]:
#Q12. Function to select rows between two dates:

def select_rows_between_dates(df, start_date, end_date):
    df['Date'] = pd.to_datetime(df['Date'])
    selected_rows = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
    return selected_rows

# Example usage:
df = pd.DataFrame({'Date': ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'],
                   'Values': [10, 20, 30, 40]})
start_date = '2023-01-01'
end_date = '2023-01-31'
selected_df = select_rows_between_dates(df, start_date, end_date)
print(selected_df)

        Date  Values
0 2023-01-01      10
1 2023-01-15      20


In [18]:
#Q13. Importing necessary library for basic pandas functions:


import pandas as pd