In [None]:
# ans 1

read_csv: Reads a comma-separated values (CSV) file into a DataFrame.
head: Returns the first n rows of a DataFrame.
describe: Generates descriptive statistics that summarize the central tendency, dispersion, and shape of a dataset’s distribution, excluding NaN values.
groupby: Groups DataFrame using a mapper or by a Series of columns.
merge: Merges DataFrame objects by performing a database-style join operation.

In [1]:
#ans 2

import pandas as pd

def reindex_dataframe(df):
    # Create a new index starting from 1 and incrementing by 2
    new_index = range(1, 2*len(df) + 1, 2)
    
    # Set the new index to the DataFrame
    df.index = new_index
    
    return df

# Example usage
data = {'A': [10, 20, 30], 'B': [40, 50, 60], 'C': [70, 80, 90]}
df = pd.DataFrame(data)

# Reindex the DataFrame
reindexed_df = reindex_dataframe(df)
print(reindexed_df)


    A   B   C
1  10  40  70
3  20  50  80
5  30  60  90


In [2]:
#ans 3
import pandas as pd

def sum_first_three_values(df):
    # Check if the 'Values' column has at least three elements
    if len(df['Values']) < 3:
        print("The 'Values' column has less than three elements.")
        return
    
    # Calculate the sum of the first three values in the 'Values' column
    sum_values = df['Values'].iloc[:3].sum()
    
    # Print the sum to the console
    print("Sum of the first three values in the 'Values' column:", sum_values)

# Example usage
data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

# Calculate and print the sum of the first three values
sum_first_three_values(df)



Sum of the first three values in the 'Values' column: 60


In [3]:
#ans 4 

import pandas as pd

def add_word_count_column(df):
    # Ensure 'Text' column is treated as a string
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df


In [4]:
df = pd.DataFrame({'Text': ['Hello world', 'Pandas is great', 'Python programming']})
df = add_word_count_column(df)
print(df)


                 Text  Word_Count
0         Hello world           2
1     Pandas is great           3
2  Python programming           2


In [None]:
#ans 5

DataFrame.size:

Definition: Returns the total number of elements in the DataFrame.
Calculation: It is the product of the number of rows and the number of columns.

DataFrame.shape:

Definition: Returns a tuple representing the dimensions of the DataFrame.
Calculation: It provides the number of rows and columns as a tuple (number_of_rows, number_of_columns)


In [None]:
#ans 6

To read an Excel file into a Pandas DataFrame, you use the pandas.read_excel() function.

In [5]:
#ans 7

import pandas as pd

def extract_username(df):
    # Create a new column 'Username' by splitting the 'Email' column and taking the part before '@'
    df['Username'] = df['Email'].str.split('@').str[0]
    return df


In [6]:
# Sample DataFrame
df = pd.DataFrame({
    'Email': ['john.doe@example.com', 'jane.smith@domain.com', 'user123@website.org']
})

# Apply the function
df = extract_username(df)

print(df)


                   Email    Username
0   john.doe@example.com    john.doe
1  jane.smith@domain.com  jane.smith
2    user123@website.org     user123


In [7]:
# ans 8

import pandas as pd

def filter_rows(df):
    # Apply the filtering conditions
    filtered_df = df[(df['A'] > 5) & (df['B'] < 10)]
    return filtered_df


In [8]:
# Sample DataFrame
df = pd.DataFrame({
    'A': [3, 8, 6, 2, 9],
    'B': [5, 2, 9, 3, 1],
    'C': [1, 7, 4, 5, 2]
})

# Apply the function
filtered_df = filter_rows(df)

print(filtered_df)


   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [9]:
# ans 9

import pandas as pd

def calculate_statistics(df):
    # Calculate mean, median, and standard deviation
    mean_value = df['Values'].mean()
    median_value = df['Values'].median()
    std_dev = df['Values'].std()
    
    # Create a dictionary to hold the results
    stats = {
        'Mean': mean_value,
        'Median': median_value,
        'Standard Deviation': std_dev
    }
    
    return stats


In [10]:
# ans 10

import pandas as pd

def add_moving_average(df):
    # Ensure the 'Date' column is in datetime format and sort by date
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values(by='Date')
    
    # Calculate the 7-day moving average for the 'Sales' column
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    
    return df


In [11]:
# Sample DataFrame
df = pd.DataFrame({
    'Date': ['2024-07-01', '2024-07-02', '2024-07-03', '2024-07-04', '2024-07-05',
             '2024-07-06', '2024-07-07', '2024-07-08', '2024-07-09', '2024-07-10'],
    'Sales': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
})

# Apply the function
df = add_moving_average(df)

print(df)


        Date  Sales  MovingAverage
0 2024-07-01    100          100.0
1 2024-07-02    200          150.0
2 2024-07-03    300          200.0
3 2024-07-04    400          250.0
4 2024-07-05    500          300.0
5 2024-07-06    600          350.0
6 2024-07-07    700          400.0
7 2024-07-08    800          500.0
8 2024-07-09    900          600.0
9 2024-07-10   1000          700.0


In [12]:
# ans 11

import pandas as pd

def add_weekday_column(df):
    # Ensure the 'Date' column is in datetime format
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Create the 'Weekday' column with the name of the weekday
    df['Weekday'] = df['Date'].dt.day_name()
    
    return df


In [13]:
# Sample DataFrame
df = pd.DataFrame({
    'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']
})

# Apply the function
df = add_weekday_column(df)

print(df)


        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday


In [14]:
#ans 12

import pandas as pd

def filter_dates(df):
    # Ensure the 'Date' column is in datetime format
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Define the start and end dates
    start_date = '2023-01-01'
    end_date = '2023-01-31'
    
    # Filter the DataFrame based on the date range
    filtered_df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
    
    return filtered_df


In [15]:
# Sample DataFrame
df = pd.DataFrame({
    'Date': ['2023-01-01', '2023-01-15', '2023-02-01', '2023-01-30', '2023-01-31']
})

# Apply the function
filtered_df = filter_dates(df)

print(filtered_df)


        Date
0 2023-01-01
1 2023-01-15
3 2023-01-30
4 2023-01-31


In [None]:
#ans 13

import pandas as pd
