In [None]:
## Answer 1)
##read_csv(): Reads a CSV (Comma Separated Values) file into a pandas DataFrame.
##head(): Returns the first n rows of a DataFrame. By default, it returns the first 5 rows.
##tail(): Returns the last n rows of a DataFrame. By default, it returns the last 5 rows.
##describe(): Generates descriptive statistics of a DataFrame, including count, mean, standard deviation, minimum, quartiles, and maximum.
##groupby(): Groups a DataFrame by one or more columns, allowing aggregation operations to be performed on the groups.

import pandas as pd

# Reading a CSV file into a DataFrame
df = pd.read_csv('data.csv')
print(df.head())

# Creating a DataFrame
data = {'Name': ['John', 'Emily', 'Mike', 'Jessica', 'David'],
        'Age': [25, 30, 35, 28, 32],
        'City': ['New York', 'Paris', 'London', 'Tokyo', 'Sydney']}

df = pd.DataFrame(data)

# Printing the first 3 rows
print(df.head(3))

# Printing the last 2 rows
print(df.tail(2))

# Generating descriptive statistics
print(df.describe())

# Grouping by 'City' column and calculating average salary
data = {'Name': ['John', 'Emily', 'Mike', 'Jessica', 'David'],
        'Age': [25, 30, 35, 28, 32],
        'City': ['New York', 'Paris', 'London', 'Tokyo', 'Sydney'],
        'Salary': [50000, 60000, 70000, 55000, 65000]}

df = pd.DataFrame(data)

grouped_df = df.groupby('City')['Salary'].mean()
print(grouped_df)

In [2]:
## Answer 2)
import pandas as pd

def reindex_with_increment(df):
    new_index = pd.Index(range(1, len(df)*2, 2))
    new_df = df.reset_index(drop=True)
    new_df.index = new_index
    return new_df


# Example usage
df = pd.DataFrame({'A': [10, 20, 30], 'B': [40, 50, 60], 'C': [70, 80, 90]})

new_df = reindex_with_increment(df)

print(new_df)

    A   B   C
1  10  40  70
3  20  50  80
5  30  60  90


In [3]:
## Answer 3)
import pandas as pd

def calculate_sum_first_three(df):
    values_column = df['Values']
    sum_first_three = sum(values_column[:3])
    print("Sum of the first three values:", sum_first_three)


# Example usage
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})

calculate_sum_first_three(df)


Sum of the first three values: 60


In [4]:
## Answer 4)
import pandas as pd

def add_word_count_column(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df


# Example usage
df = pd.DataFrame({'Text': ['Hello, how are you?', 'I am doing well.', 'Python is great.']})

df = add_word_count_column(df)

print(df)


                  Text  Word_Count
0  Hello, how are you?           4
1     I am doing well.           4
2     Python is great.           3


In [None]:
## Answer 5)
The `DataFrame.size()` and `DataFrame.shape()` methods in pandas provide different information about the shape and size of a DataFrame.

1. `DataFrame.size()`:
   - The `DataFrame.size()` method returns the total number of elements in the DataFrame.
   - It calculates the size by multiplying the number of rows (`DataFrame.shape[0]`) with the number of columns (`DataFrame.shape[1]`).
   - The returned value represents the total number of elements in the DataFrame, including NaN or missing values.
   - The size is a scalar value, representing the total count of elements.

2. `DataFrame.shape()`:
   - The `DataFrame.shape()` method returns a tuple containing the dimensions of the DataFrame.
   - It provides the number of rows as the first element (`DataFrame.shape[0]`) and the number of columns as the second element (`DataFrame.shape[1]`).
   - The shape is represented as `(number of rows, number of columns)`.
   - The shape is a tuple, providing information about the structure of the DataFrame.

In summary, `DataFrame.size()` returns the total number of elements in the DataFrame as a scalar value, while `DataFrame.shape()` returns a tuple representing the dimensions of the DataFrame as `(number of rows, number of columns)`.

## Answer 6)
In pandas, the function used to read an Excel file is read_excel(). The read_excel() function allows you to read the data from one or multiple sheets of an Excel file into a pandas DataFrame.

In [6]:
## Answer 7)
import pandas as pd

def extract_username(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df


# Example usage
df = pd.DataFrame({'Email': ['john.doe@example.com', 'jane.smith@example.com', 'alice@domain.com']})

df = extract_username(df)

print(df)


                    Email    Username
0    john.doe@example.com    john.doe
1  jane.smith@example.com  jane.smith
2        alice@domain.com       alice


In [7]:
## Answer 8)
import pandas as pd

def select_rows(df):
    selected_rows = df[(df['A'] > 5) & (df['B'] < 10)]
    return selected_rows


# Example usage
df = pd.DataFrame({'A': [3, 8, 6, 2, 9], 'B': [5, 2, 9, 3, 1], 'C': [1, 7, 4, 5, 2]})

selected_df = select_rows(df)

print(selected_df)


   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [8]:
## Answer 9)
import pandas as pd

def calculate_statistics(df):
    values_column = df['Values']
    mean_value = values_column.mean()
    median_value = values_column.median()
    std_value = values_column.std()
    return mean_value, median_value, std_value


# Example usage
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})

mean, median, std = calculate_statistics(df)

print("Mean:", mean)
print("Median:", median)
print("Standard Deviation:", std)

Mean: 30.0
Median: 30.0
Standard Deviation: 15.811388300841896


In [9]:
## Answer 10)
import pandas as pd

def calculate_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    return df


# Example usage
df = pd.DataFrame({'Sales': [10, 15, 20, 25, 30, 35, 40, 45, 50],
                   'Date': ['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
                            '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08',
                            '2022-01-09']})

df = calculate_moving_average(df)

print(df)


   Sales        Date  MovingAverage
0     10  2022-01-01           10.0
1     15  2022-01-02           12.5
2     20  2022-01-03           15.0
3     25  2022-01-04           17.5
4     30  2022-01-05           20.0
5     35  2022-01-06           22.5
6     40  2022-01-07           25.0
7     45  2022-01-08           30.0
8     50  2022-01-09           35.0


In [10]:
## Answer 11)
import pandas as pd

def add_weekday_column(df):
    df['Weekday'] = df['Date'].dt.day_name()
    return df


# Example usage
df = pd.DataFrame({'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']})

df['Date'] = pd.to_datetime(df['Date'])

df = add_weekday_column(df)

print(df)

        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday


In [11]:
## Answer 12)
import pandas as pd

def select_rows_by_date_range(df):
    mask = (df['Date'] >= '2023-01-01') & (df['Date'] <= '2023-01-31')
    selected_rows = df[mask]
    return selected_rows


# Example usage
df = pd.DataFrame({'Date': ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15']})

df['Date'] = pd.to_datetime(df['Date'])

selected_df = select_rows_by_date_range(df)

print(selected_df)


        Date
0 2023-01-01
1 2023-01-15


## Answer 13)
To use the basic functions of pandas, the first and foremost necessary library that needs to be imported is the pandas library itself. The pandas library provides data structures and functions for efficient data manipulation and analysis.