In [2]:
# Creating two DataFrames
import pandas as pd
df1 = pd.DataFrame({
    'Name': ['Alice', 'Bob'],
    'Age': [25, 30]
})

df2 = pd.DataFrame({
    'Name': ['Charlie', 'David'],
    'Age': [35, 40]
})

# Correct concatenation along rows (axis=0)
combined_df = pd.concat([df1, df2], axis=0).reset_index(drop=True)
print(combined_df)


      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [3]:
# Creating two DataFrames
df1 = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID': [1, 2, 4],
    'Salary': [50000, 60000, 70000]
})

# Correct merging with proper join type (how='left')
merged_df = pd.merge(df1, df2, on='ID', how='left')
print(merged_df)


   ID     Name   Salary
0   1    Alice  50000.0
1   2      Bob  60000.0
2   3  Charlie      NaN


In [4]:
# Creating a DataFrame
df = pd.DataFrame({
    'Date': ['2023-01-01', '2023-01-02', '2023-01-03'],
    'City': ['New York', 'Chicago', 'New York'],
    'Sales': [100, 150, 200]
})

# Correct pivot table syntax (use 'columns', not 'cols')
pivot_df = pd.pivot_table(df, index='Date', values='Sales', columns='City', aggfunc='sum')
print(pivot_df)


City        Chicago  New York
Date                         
2023-01-01      NaN     100.0
2023-01-02    150.0       NaN
2023-01-03      NaN     200.0


In [5]:
# Creating a DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Salary': [50000, 60000, 70000]
})

# Correct application of a function to a column
df['New Salary'] = df['Salary'].apply(lambda x: x * 1.1 if x > 60000 else x * 1.05)
print(df)


      Name  Salary  New Salary
0    Alice   50000     52500.0
1      Bob   60000     63000.0
2  Charlie   70000     77000.0


In [6]:
# Creating a DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Alice', 'Bob'],
    'Sales': [200, 150, 300, 400],
    'City': ['New York', 'Chicago', 'New York', 'Chicago']
})

# Correct groupby with multiple aggregation functions (use 'mean' instead of 'average')
grouped_df = df.groupby('City')['Sales'].agg([sum, 'mean'])
print(grouped_df)


          sum   mean
City                
Chicago   550  275.0
New York  500  250.0


  grouped_df = df.groupby('City')['Sales'].agg([sum, 'mean'])


In [7]:
# Creating a DataFrame with correct date type
df = pd.DataFrame({
    'Date': ['2023-01-01', '2023-02-01', '2023-03-01'],
    'Sales': [200, 300, 400]
})

# Correct datetime conversion
df['Date'] = pd.to_datetime(df['Date'])

# Setting Date column as index
df.set_index('Date', inplace=True)
print(df)


            Sales
Date             
2023-01-01    200
2023-02-01    300
2023-03-01    400


In [8]:
# Creating a DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob'],
    'Math': [85, 90],
    'English': [95, 80]
})

# Correct use of melt (correct column name in 'id_vars')
melted_df = pd.melt(df, id_vars='Name', value_vars=['Math', 'English'])
print(melted_df)


    Name variable  value
0  Alice     Math     85
1    Bob     Math     90
2  Alice  English     95
3    Bob  English     80


In [9]:
# Creating a DataFrame
df = pd.DataFrame({
    'Date': pd.date_range(start='2023-01-01', periods=5),
    'Sales': [100, 200, 150, 300, 250]
})

# Correct rolling function (use 'rolling' instead of 'roll')
df['Rolling Mean'] = df['Sales'].rolling(3).mean()
print(df)


        Date  Sales  Rolling Mean
0 2023-01-01    100           NaN
1 2023-01-02    200           NaN
2 2023-01-03    150    150.000000
3 2023-01-04    300    216.666667
4 2023-01-05    250    233.333333


In [10]:
import numpy as np

# Creating a DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Score': [85, 90, 70]
})

# Correct conditional assignment (use correct column name and syntax)
df['Result'] = np.where(df['Score'] >= 80, 'Pass', 'Fail')
print(df)


      Name  Score Result
0    Alice     85   Pass
1      Bob     90   Pass
2  Charlie     70   Fail
