In [None]:
'''Q1. List any five functions of the pandas library with execution.
Ans-Pandas DataFrame provides a wide range of functions to manipulate data. Some of the common functions are:

head() and tail() - To view the first or last few rows of the DataFrame. These functions can be useful when we want to have a quick glance at the DataFrame.

info() and describe() - info() provides the summary of the DataFrame including the data types of each column and the number of non-null values. describe() provides the statistical summary of the numerical columns of the DataFrame.

sort_values() - To sort the rows of a DataFrame based on one or more columns. This function can be useful when we want to find the top or bottom values in a DataFrame based on a particular column.

groupby() and agg() - groupby() is used to group the rows of a DataFrame based on one or more columns. agg() is used to perform aggregations on the grouped data. These functions can be useful when we want to calculate summary statistics for different groups in the DataFrame.

drop() and fillna() - drop() is used to drop rows or columns from the DataFrame. fillna() is used to fill missing values in the DataFrame. These functions can be useful when we want to clean the DataFrame before analysis.

apply() - To apply a function to each element of a DataFrame. This function can be useful when we want to perform some custom operation on the DataFrame.

'''
import pandas as pd

# create a sample DataFrame
data = {'name': ['Alice', 'Bob', 'Charlie', 'David', 'Emily'],
        'age': [25, 30, 25, 40, 45],
        'gender': ['F', 'M', 'M', 'M', 'F'],
        'salary': [50000, 60000, 70000, 80000,None]
        }


df=pd.DataFrame(data)

# view the first few rows of the DataFrame
print(df.head())

# view the last few rows of the DataFrame
print(df.tail())

# get the summary information of the DataFrame
print(df.info())

# get the statistical summary of the numerical columns of the DataFrame
print(df.describe())

# sort the DataFrame by age in descending order
df_sorted=df.sort_values('age',ascending=False)
print(df_sorted)

# group the DataFrame by gender and calculate the average salary for each group
grouped_df=df.groupby('gender').agg({'salary':'mean'})
print(grouped_df)

grouped_df=df.groupby('age').agg({'salary':'mean'})
print(grouped_df)

# drop the age column from the DataFrame
df_dropped=df.drop('age',axis=1)
print(df_dropped)

# fill missing values in the salary column with the mean salary
mean_salary = df['salary'].mean()
df_filled = df.fillna({'salary': mean_salary})
print(df_filled)



In [2]:


'''Q2. Given a Pandas DataFrame df with columns 'A', 'B', and 'C', write a Python function to re-index the
DataFrame with a new index that starts from 1 and increments by 2 for each row.'''

import pandas as pd

def reindex(df):
    df.reset_index(drop=True, inplace=True)
    df.index = range(1, 2*len(df.index)+1, 2)
    return df
data={"A":[2,3,4,5],
      "B":[2,3,4,5],
      "C":[2,3,4,5]
}

df=pd.DataFrame(data)
new_df = reindex(df)
print(new_df)




   A  B  C
1  2  2  2
3  3  3  3
5  4  4  4
7  5  5  5


In [4]:

'''Q3. You have a Pandas DataFrame df with a column named 'Values'. Write a Python function that
iterates over the DataFrame and calculates the sum of the first three values in the 'Values' column. The
function should print the sum to the console.
'''
def sum_first_three_values(df):
    total=0
    for i,row in df.iterrows():
        if i<3:
            total=total+row['Values']
    print('The sum of first three values',total)

df= pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
sum_first_three_values(df)




The sum of first three values 60


In [15]:
'''Q4. Given a Pandas DataFrame df with a column 'Text', write a Python function to create a new column
'Word_Count' that contains the number of words in each row of the 'Text' column.'''

import pandas as pd

def word_counts(df):
    df['word_count']=df['Text'].apply(lambda x: len(str(x).split()))
    return df

df = pd.DataFrame({'Text': ['This is a sample text', 'Here is another sample text', 'And one more']})
df = word_counts(df)
print(df)


                          Text  word_count
0        This is a sample text           5
1  Here is another sample text           5
2                 And one more           3


In [18]:
'''
Q5. How are DataFrame.size() and DataFrame.shape() different?

Ans-Both DataFrame.size() and DataFrame.shape() are methods in Pandas that are used to get the dimensions of a DataFrame. However, they return different values and are used for different purposes.

The DataFrame.size() method returns the total number of elements in the DataFrame, i.e., the number of rows multiplied by the number of columns. This can be useful when you need to know the total number of cells in the DataFrame.

On the other hand, the DataFrame.shape() method returns a tuple of two integers representing the number of rows and columns in the DataFrame, respectively. This is typically used to get an overview of the DataFrame's structure
'''

import pandas as pd

df=pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
print(df.size)
print(df.shape)

9
(3, 3)


In [None]:
'''Q6. Which function of pandas do we use to read an excel file?
Ans-To read an Excel file in pandas, you can use the read_excel() function provided by pandas. Here is an example:

'''
import pandas as pd

df = pd.read_excel("filename.xlsx")


In [2]:

'''Q7. You have a Pandas DataFrame df that contains a column named 'Email' that contains email
addresses in the format 'username@domain.com'. Write a Python function that creates a new column
'Username' in df that contains only the username part of each email address.

The username is the part of the email address that appears before the '@' symbol. For example, if the
email address is 'john.doe@example.com', the 'Username' column should contain 'john.doe'. Your
function should extract the username from each email address and store it in the new 'Username'
column.

'''

def extract_username(df):
    df[['Username', 'Domain']] = df['Email'].str.split('@', expand=True)

    return df


import pandas as pd

df = pd.DataFrame({'Email': ['john.doe@example.com', 'jane.doe@example.com', 'james.smith@example.com']})

df = extract_username(df)

print(df)


                     Email     Username       Domain
0     john.doe@example.com     john.doe  example.com
1     jane.doe@example.com     jane.doe  example.com
2  james.smith@example.com  james.smith  example.com


In [1]:

'''Q8. You have a Pandas DataFrame df with columns 'A', 'B', and 'C'. Write a Python function that selects
all rows where the value in column 'A' is greater than 5 and the value in column 'B' is less than 10. The
function should return a new DataFrame that contains only the selected rows.

For example, if df contains the following values:

   A   B   C

0  3   5   1

1  8   2   7

2  6   9   4

3  2   3   5

4  9   1   2

'''

import pandas as pd

data={'A':[3,8,6,2,9,],'B':[5,2,9,3,1],'C':[1,7,4,5,2]}

df=pd.DataFrame(data)

def func(df):
    rows=df[(df['A']>5) & ( df['B']<10)]
    return rows
print(func(df))


   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [5]:
'''Q9. Given a Pandas DataFrame df with a column 'Values', write a Python function to calculate the mean,
median, and standard deviation of the values in the 'Values' column.'''
import pandas as pd
df=pd.DataFrame({'Values':[10,20,30,40,50]})

def calculater(df):
    print("Mean",df['Values'].mean()) 
    print("Median",df['Values'].median())
    print("standard deviation",df['Values'].std())
calculater(df)

Mean 30.0
Median 30.0
standard deviation 15.811388300841896


In [None]:

'''Q10. Given a Pandas DataFrame df with a column 'Sales' and a column 'Date', write a Python function to
create a new column 'MovingAverage' that contains the moving average of the sales for the past 7 days
for each row in the DataFrame. The moving average should be calculated using a window of size 7 and
should include the current day'''

import pandas as pd

def add_moving_average(df):
    ma=df['Sales'].rolling(window=7,min_periods=1).mean()
    df['Moving average']=ma
    return df


df = pd.DataFrame({'Sales': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100,110,120],
                   'Date': pd.date_range(start='2022-02-22', periods=12, freq='D')})

print(add_moving_average(df))

In [None]:

'''Q10. Given a Pandas DataFrame df with a column 'Sales' and a column 'Date', write a Python function to
create a new column 'MovingAverage' that contains the moving average of the sales for the past 7 days
for each row in the DataFrame. The moving average should be calculated using a window of size 7 and
should include the current day'''

import pandas as pd

def add_moving_average(df):
    ma = df['Sales'].rolling(window=7,min_periods=1).mean()
    df['MovingAverage']=ma.values
    return df


df = pd.DataFrame({'Sales': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
                   'Date': ['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08', '2022-01-09', '2022-01-10']})

df = add_moving_average(df)
print(df)



In [None]:

'''Q10. Given a Pandas DataFrame df with a column 'Sales' and a column 'Date', write a Python function to
create a new column 'MovingAverage' that contains the moving average of the sales for the past 7 days
for each row in the DataFrame. The moving average should be calculated using a window of size 7 and
should include the current day'''

import pandas as pd

def add_moving_average(df):
    ma = df['Sales'].rolling(window=7,min_periods=1).mean()
    df['MovingAverage']=ma.values
    return df


df = pd.DataFrame({'Sales': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
                   'Date': ['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08', '2022-01-09', '2022-01-10']})

df = add_moving_average(df)
print(df)



In [None]:
'''
Q11.You have a Pandas DataFrame df with a column 'Date'. Write a Python function that creates a new
column 'Weekday' in the DataFrame. The 'Weekday' column should contain the weekday name (e.g.
Monday, Tuesday) corresponding to each date in the 'Date' column.
For example, if df contains the following values:
Date
0 2023-01-01
1 2023-01-02
2 2023-01-03
3 2023-01-04
4 2023-01-05
Your function should create the following DataFrame:

Date Weekday
0 2023-01-01 Sunday
1 2023-01-02 Monday
2 2023-01-03 Tuesday
3 2023-01-04 Wednesday
4 2023-01-05 Thursday
The function should return the modified DataFrame.

Ans-'''

import pandas as pd

def add_weekday(df):
    df['Weekday'] = df['Date'].dt.strftime('%A')
    return df
'''The dt accessor allows you to access the datetime properties of a Pandas Series, and strftime formats the datetime value as a string using a specified format string. %A returns the full weekday name (e.g. Sunday, Monday).
'''

df = pd.DataFrame({'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']})
df['Date'] = pd.to_datetime(df['Date'])
df = add_weekday(df)
print(df)




In [None]:
'''Q12. Given a Pandas DataFrame df with a column 'Date' that contains timestamps, write a Python
function to select all rows where the date is between '2023-01-01' and '2023-01-31'.
Ans-between() function of Pandas to select all rows where the date is between '2023-01-01' and '2023-01-31'. Here is the Python function to accomplish this:

'''
import pandas as pd

def select_rows_between_dates(df):
    start_date = '2023-01-01'
    end_date = '2023-01-31'
    mask = (df['Date'] >= start_date) & (df['Date'] <= end_date)
    return df.loc[mask]
df = pd.DataFrame({'Date': ['2023-01-01', '2023-01-05', '2023-01-15', '2023-02-01'], 'Sales': [10, 20, 30, 40]})
selected_df = select_rows_between_dates(df)
print(selected_df)



In [None]:
'''Q13. To use the basic functions of pandas, what is the first and foremost necessary library that needs to
be imported?

Ans-The first and foremost necessary library that needs to be imported to use the basic functions of pandas is pandas itself.

To import pandas, you can use the following code:

import pandas as pd

The pd alias is a commonly used convention for pandas, as it makes it easier to refer to the pandas functions later in your code. Once you have imported pandas, you can use its functions and classes to manipulate and analyze data in Python.
'''