In [7]:
#Q1. List any five functions of the pandas library with execution.


'''
1. `fillna()`: This function is used to fill missing values in a DataFrame or Series with a specified value or using a filling
    strategy like forward fill or backward fill.
'''
import pandas as pd

# Create a DataFrame with missing values
data = {'Name': ['John', 'Emily', 'Daniel', 'Sophia', 'Alex'],
        'Age': [25, None, 35, None, 33]}
df = pd.DataFrame(data)

# Fill the missing values in 'Age' column with the mean age
df['Age'].fillna(df['Age'].mean(), inplace=True)
print(df)




'''
2. `head()`: It is used to display the first few rows of a DataFrame. By default, it shows the first five rows.
'''
import pandas as pd

# Create a DataFrame
data = {'Name': ['John', 'Emily', 'Daniel', 'Sophia', 'Alex'],
        'Age': [25, 30, 35, 28, 33]}
df = pd.DataFrame(data)

# Display the first three rows
print(df.head(3))

'''
3. `groupby()`: This function is used to group data based on one or more columns, allowing for aggregations and transformations on the grouped data.
'''
import pandas as pd

# Create a DataFrame
data = {'Name': ['John', 'Emily', 'Daniel', 'Sophia', 'Alex'],
        'Age': [25, 30, 35, 28, 33],
        'Gender': ['Male', 'Female', 'Male', 'Female', 'Male']}
df = pd.DataFrame(data)

# Grouping the data by 'Gender' column and calculating the mean age
grouped_df = df.groupby('Gender').mean()
print(grouped_df)

'''
4. `sort_values()`: It is used to sort a DataFrame by one or more columns.
'''
import pandas as pd

# Create a DataFrame
data = {'Name': ['John', 'Emily', 'Daniel', 'Sophia', 'Alex'],
        'Age': [25, 30, 35, 28, 33]}
df = pd.DataFrame(data)

# Sort the DataFrame by 'Age' column in descending order
sorted_df = df.sort_values('Age', ascending=False)
print(sorted_df)

'''
5. `to_csv()`: This function is used to save a DataFrame to a CSV file.
'''
import pandas as pd

# Create a DataFrame
data = {'Name': ['John', 'Emily', 'Daniel', 'Sophia', 'Alex'],
        'Age': [25, 30, 35, 28, 33]}
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('output.csv', index=False)

'''
In these examples, we used various functions like `fillna()`, `head()`, `groupby()`, `sort_values()`, and `to_csv()` to read data
from a CSV file, display rows of a DataFrame, group data, sort a DataFrame, and save a DataFrame to a CSV file, respectively.
'''
pass


     Name   Age
0    John  25.0
1   Emily  31.0
2  Daniel  35.0
3  Sophia  31.0
4    Alex  33.0
     Name  Age
0    John   25
1   Emily   30
2  Daniel   35
         Age
Gender      
Female  29.0
Male    31.0
     Name  Age
2  Daniel   35
4    Alex   33
1   Emily   30
3  Sophia   28
0    John   25


  grouped_df = df.groupby('Gender').mean()


In [15]:
#Q2. Given a Pandas DataFrame df with columns 'A', 'B', and 'C', write a Python function to re-index the
#DataFrame with a new index that starts from 1 and increments by 2 for each row.

import pandas as pd

def reindex_dataframe(df):
    new_index = pd.RangeIndex(start=1, stop=len(df)*2, step=2)
    df_reindexed = df.set_index(new_index)
    return df_reindexed

df = pd.DataFrame({'A': [10, 20, 30, 40, 50], 'B': [60, 70, 80, 90, 100], 'C': [110, 120, 130, 140, 150]})

df_reindexed = reindex_dataframe(df)
print(df_reindexed)




    A    B    C
1  10   60  110
3  20   70  120
5  30   80  130
7  40   90  140
9  50  100  150


In [12]:
#Q3. You have a Pandas DataFrame df with a column named 'Values'. Write a Python function that iterates over the DataFrame and 
#calculates the sum of the first three values in the 'Values' column. The function should print the sum to the console.

import pandas as pd

values = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})

def calculate_sum(df):
    sum_values = df['Values'].head(3).sum()
    print("Sum of the first three values:", sum_values)


calculate_sum(values)



Sum of the first three values: 60


In [14]:
#Q4. Given a Pandas DataFrame df with a column 'Text', write a Python function to create a new column 'Word_Count' that contains the number of words in each row of the 'Text' column.

import pandas as pd

def add_word_count(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df


df = pd.DataFrame({'Text': ['Hello, how are you?', 'I am doing well.', 'Python is fun!']})


df_with_word_count = add_word_count(df)
print(df_with_word_count)


                  Text  Word_Count
0  Hello, how are you?           4
1     I am doing well.           4
2       Python is fun!           3


In [16]:
#Q5. How are DataFrame.size() and DataFrame.shape() different?

'''
The `DataFrame.size` and `DataFrame.shape` are both attributes of a Pandas DataFrame, but they provide different information about 
the DataFrame.

1. `DataFrame.size` returns the total number of elements in the DataFrame, which is calculated by multiplying the number of rows 
   (`DataFrame.shape[0]`) by the number of columns (`DataFrame.shape[1]`). It represents the total number of cells in the DataFrame,
    including both filled and empty values.

2. `DataFrame.shape` returns a tuple containing the number of rows and columns in the DataFrame. The shape attribute is in the form
   `(rows, columns)`, where `rows` represents the number of rows and `columns` represents the number of columns in the DataFrame.

Here's an example to illustrate the difference:
'''

import pandas as pd

data = {'A': [1, 2, 3],
        'B': [4, 5, 6],
        'C': [7, 8, 9]}

df = pd.DataFrame(data)

print("DataFrame size:", df.size)        
print("DataFrame shape:", df.shape)     




DataFrame size: 9
DataFrame shape: (3, 3)


In [17]:
#Q6. Which function of pandas do we use to read an excel file?

'''
To read an Excel file in Pandas, you can use the `pandas.read_excel()` function. It allows you to read data from an Excel file and
create a DataFrame from it. 
 example:


import pandas as pd

# Read Excel file
df = pd.read_excel('path/to/file.xlsx')

# Print the DataFrame
print(df)


In this example, the `read_excel()` function is used to read the Excel file located at `'path/to/file.xlsx'`. The function reads the
data from the file and creates a DataFrame `df` from it. You can then perform various operations and analysis on the DataFrame as 
needed.

Make sure to provide the correct path to your Excel file in the `read_excel()` function call. If the Excel file is in the same directory
as your Python script or notebook, you can provide just the file name with its extension. If it is in a different directory, you need to
provide the full path to the file.
'''
pass

In [18]:
#Q7. You have a Pandas DataFrame df that contains a column named 'Email' that contains email addresses in the format 'username@domain.com'.
#Write a Python function that creates a new column 'Username' in df that contains only the username part of each email address.
#The username is the part of the email address that appears before the '@' symbol. For example, if the email address is 'john.doe@example.com',
#the 'Username' column should contain 'john.doe'. Your function should extract the username from each email address and store it in the new 
#'Username'column.

import pandas as pd

def extract_username(df):
    df['Username'] = df['Email'].str.split('@').str[0]
    return df


df = pd.DataFrame({'Email': ['john.doe@example.com', 'jane.smith@example.com', 'bob.johnson@example.com']})


df_with_username = extract_username(df)
print(df_with_username)


                     Email     Username
0     john.doe@example.com     john.doe
1   jane.smith@example.com   jane.smith
2  bob.johnson@example.com  bob.johnson


In [19]:
''''
Q8. You have a Pandas DataFrame df with columns 'A', 'B', and 'C'. Write a Python function that selects
all rows where the value in column 'A' is greater than 5 and the value in column 'B' is less than 10. The
function should return a new DataFrame that contains only the selected rows.
For example, if df contains the following values:
A B C
0 3 5 1
1 8 2 7
2 6 9 4
3 2 3 5
4 9 1 2
'''

import pandas as pd

def select_rows(df):
    selected_df = df[(df['A'] > 5) & (df['B'] < 10)]
    return selected_df


df = pd.DataFrame({'A': [3, 8, 6, 2, 9],
                   'B': [5, 2, 9, 3, 1],
                   'C': [1, 7, 4, 5, 2]})


selected_rows = select_rows(df)
print(selected_rows)


   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [20]:
#Q9. Given a Pandas DataFrame df with a column 'Values', write a Python function to calculate the mean, median, and standard deviation of the values in the 'Values' column.

import pandas as pd

def calculate_statistics(df):
    mean = df['Values'].mean()
    median = df['Values'].median()
    std_dev = df['Values'].std()
    return mean, median, std_dev

df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})

mean_value, median_value, std_dev_value = calculate_statistics(df)
print("Mean:", mean_value)
print("Median:", median_value)
print("Standard Deviation:", std_dev_value)


Mean: 30.0
Median: 30.0
Standard Deviation: 15.811388300841896


In [21]:
#Q10. Given a Pandas DataFrame df with a column 'Sales' and a column 'Date', write a Python function to create a new column 
#'MovingAverage' that contains the moving average of the sales for the past 7 days for each row in the DataFrame. The moving average 
#should be calculated using a window of size 7 and should include the current day.

import pandas as pd

def calculate_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    return df


df = pd.DataFrame({'Date': pd.date_range(start='2023-01-01', periods=10),
                   'Sales': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]})


df_with_ma = calculate_moving_average(df)
print(df_with_ma)


        Date  Sales  MovingAverage
0 2023-01-01     10           10.0
1 2023-01-02     20           15.0
2 2023-01-03     30           20.0
3 2023-01-04     40           25.0
4 2023-01-05     50           30.0
5 2023-01-06     60           35.0
6 2023-01-07     70           40.0
7 2023-01-08     80           50.0
8 2023-01-09     90           60.0
9 2023-01-10    100           70.0


In [22]:
'''
Q11. You have a Pandas DataFrame df with a column 'Date'. Write a Python function that creates a new
column 'Weekday' in the DataFrame. The 'Weekday' column should contain the weekday name (e.g.
Monday, Tuesday) corresponding to each date in the 'Date' column.
For example, if df contains the following values:
Date
0 2023-01-01
1 2023-01-02
2 2023-01-03
3 2023-01-04
4 2023-01-05
'''

import pandas as pd

def add_weekday_column(df):
    df['Weekday'] = df['Date'].dt.day_name()
    return df


df = pd.DataFrame({'Date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'])})


df_with_weekday = add_weekday_column(df)
print(df_with_weekday)


        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday


In [23]:
#Q12. Given a Pandas DataFrame df with a column 'Date' that contains timestamps, write a Python function to 
#select all rows where the date is between '2023-01-01' and '2023-01-31'.

import pandas as pd

def select_rows_between_dates(df):
    start_date = pd.to_datetime('2023-01-01')
    end_date = pd.to_datetime('2023-01-31')
    selected_df = df[df['Date'].between(start_date, end_date)]
    return selected_df


df = pd.DataFrame({'Date': pd.to_datetime(['2023-01-01', '2023-01-15', '2023-01-31', '2023-02-10'])})


selected_rows = select_rows_between_dates(df)
print(selected_rows)


        Date
0 2023-01-01
1 2023-01-15
2 2023-01-31


In [24]:
#Q13. To use the basic functions of pandas, what is the first and foremost necessary library that needs to be imported?

'''
The first and foremost necessary library that needs to be imported to use the basic functions of pandas is the `pandas` library 
itself. Pandas is a powerful data manipulation and analysis library in Python that provides data structures and functions for 
handling and analyzing structured data, such as DataFrames and Series.

To import the pandas library, you can use the following import statement:

import pandas as pd

By convention, the `pandas` library is commonly imported with the alias `pd`. This allows you to access the pandas functions and 
objects using the `pd` prefix.

Once you have imported the pandas library, you can start using its functions and objects to manipulate and analyze data using 
DataFrames and Series.
'''
pass