In [1]:
import pandas as pd

# Creating a sample DataFrame
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)

# 1. head() - Displays the first n rows (default is 5)
print(df.head())

# 2. describe() - Generates summary statistics
print(df.describe())

# 3. info() - Provides information about the DataFrame
print(df.info())

# 4. drop() - Drops a column
df_dropped = df.drop(columns=['B'])
print(df_dropped)

# 5. fillna() - Fills missing values
df_filled = df.fillna(0)
print(df_filled)


   A  B
0  1  4
1  2  5
2  3  6
         A    B
count  3.0  3.0
mean   2.0  5.0
std    1.0  1.0
min    1.0  4.0
25%    1.5  4.5
50%    2.0  5.0
75%    2.5  5.5
max    3.0  6.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       3 non-null      int64
 1   B       3 non-null      int64
dtypes: int64(2)
memory usage: 120.0 bytes
None
   A
0  1
1  2
2  3
   A  B
0  1  4
1  2  5
2  3  6


In [2]:
def reindex_dataframe(df):
    df.index = range(1, 2 * len(df), 2)
    return df

# Example Usage
df = pd.DataFrame({'A': [10, 20, 30], 'B': [40, 50, 60], 'C': [70, 80, 90]})
df = reindex_dataframe(df)
print(df)


    A   B   C
1  10  40  70
3  20  50  80
5  30  60  90


In [3]:
def sum_first_three(df):
    print(df['Values'].head(3).sum())

# Example Usage
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
sum_first_three(df)


60


In [4]:
def add_word_count(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df

# Example Usage
df = pd.DataFrame({'Text': ["Hello World", "This is Pandas", "Data Science"]})
df = add_word_count(df)
print(df)


             Text  Word_Count
0     Hello World           2
1  This is Pandas           3
2    Data Science           2


In [5]:
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
print(df.size)  # Output: 4
print(df.shape) # Output: (2, 2)


4
(2, 2)


In [6]:
df = pd.read_excel('file.xlsx')


<class 'FileNotFoundError'>: [Errno 44] No such file or directory: 'file.xlsx'

In [7]:
def extract_username(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df

# Example Usage
df = pd.DataFrame({'Email': ['john.doe@example.com', 'alice.smith@gmail.com']})
df = extract_username(df)
print(df)


                   Email     Username
0   john.doe@example.com     john.doe
1  alice.smith@gmail.com  alice.smith


In [8]:
def filter_rows(df):
    return df[(df['A'] > 5) & (df['B'] < 10)]

# Example Usage
df = pd.DataFrame({'A': [3, 8, 6, 2, 9], 'B': [5, 2, 9, 3, 1], 'C': [1, 7, 4, 5, 2]})
filtered_df = filter_rows(df)
print(filtered_df)


   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [9]:
def calculate_stats(df):
    return {
        'Mean': df['Values'].mean(),
        'Median': df['Values'].median(),
        'Std Dev': df['Values'].std()
    }

# Example Usage
df = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
stats = calculate_stats(df)
print(stats)


{'Mean': np.float64(30.0), 'Median': np.float64(30.0), 'Std Dev': np.float64(15.811388300841896)}


In [10]:
def add_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    return df

# Example Usage
df = pd.DataFrame({'Sales': [100, 200, 300, 400, 500, 600, 700, 800, 900]})
df = add_moving_average(df)
print(df)


   Sales  MovingAverage
0    100          100.0
1    200          150.0
2    300          200.0
3    400          250.0
4    500          300.0
5    600          350.0
6    700          400.0
7    800          500.0
8    900          600.0


In [11]:
def add_weekday(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df['Weekday'] = df['Date'].dt.day_name()
    return df

# Example Usage
df = pd.DataFrame({'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']})
df = add_weekday(df)
print(df)


        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday
