In [None]:
import pandas as pd
import numpy as np

In [None]:
data = {
    'Name': ['Pratik', 'Rahul', 'Sneha', 'Amit', 'Priya'],
    'Age': [25, 30, 35, 28, 32],
    'City': ['Satara', 'Pune', 'Mumbai', 'Nashik', 'Nagpur'],
    'Salary': [50000, 60000, 75000, 55000, 70000]
}
df = pd.DataFrame(data)
print(df)

In [None]:
df.to_csv('employee_data.csv', index=False)

df_read = pd.read_csv('employee_data.csv')
print(df_read)

In [None]:
print(df.head(3))
print(df.tail(2))
print(df.info())
print(df.describe())
print(df.shape)
print(df.columns.tolist())
print(df.dtypes)

In [None]:
print(df['Name'])
print(df[['Name', 'Age']])
print(df.loc[2])
print(df.loc[1:3])
print(df.iloc[0:2, 0:3])

In [None]:
print(df[df['Age'] > 30])
print(df[df['Salary'] >= 60000])
print(df[(df['Age'] > 25) & (df['Salary'] > 60000)])
print(df[df['City'].isin(['Satara', 'Pune'])])

In [None]:
df['Bonus'] = df['Salary'] * 0.1
print(df)

df['Age'] = df['Age'] + 1
print(df)

df_copy = df.copy()
df_copy = df_copy.drop('Bonus', axis=1)
print(df_copy)

In [None]:
data_missing = {
    'A': [1, 2, np.nan, 4],
    'B': [5, np.nan, np.nan, 8],
    'C': [9, 10, 11, 12]
}
df_missing = pd.DataFrame(data_missing)
print(df_missing)

print(df_missing.isnull())
print(df_missing.isnull().sum())

df_filled = df_missing.fillna(0)
print(df_filled)

df_dropped = df_missing.dropna()
print(df_dropped)

In [None]:
print(df.sort_values('Age'))
print(df.sort_values('Salary', ascending=False))
print(df.sort_values(['Age', 'Salary']))

In [None]:
data_group = {
    'Department': ['Sales', 'Sales', 'IT', 'IT', 'HR', 'HR'],
    'Employee': ['Pratik', 'Rahul', 'Sneha', 'Amit', 'Priya', 'Neha'],
    'Salary': [50000, 55000, 70000, 75000, 45000, 48000],
    'Experience': [2, 3, 5, 7, 1, 2]
}
df_group = pd.DataFrame(data_group)
print(df_group)

print(df_group.groupby('Department')['Salary'].mean())

print(df_group.groupby('Department').agg({
    'Salary': ['mean', 'sum', 'count'],
    'Experience': 'max'
}))

In [None]:
df1 = pd.DataFrame({
    'ID': [1, 2, 3, 4],
    'Name': ['Pratik', 'Rahul', 'Sneha', 'Amit']
})

df2 = pd.DataFrame({
    'ID': [1, 2, 5, 6],
    'Score': [85, 90, 78, 88]
})

print(df1)
print(df2)

print(pd.merge(df1, df2, on='ID', how='inner'))
print(pd.merge(df1, df2, on='ID', how='left'))
print(pd.merge(df1, df2, on='ID', how='outer'))

In [None]:
df['Salary_Category'] = df['Salary'].apply(lambda x: 'High' if x > 60000 else 'Low')
print(df)

def categorize_age(age):
    if age < 30:
        return 'Young'
    elif age < 35:
        return 'Middle'
    else:
        return 'Senior'

df['Age_Category'] = df['Age'].apply(categorize_age)
print(df)

In [None]:
df['Name_Upper'] = df['Name'].str.upper()
df['Name_Length'] = df['Name'].str.len()
print(df[['Name', 'Name_Upper', 'Name_Length']])

print(df[df['Name'].str.contains('a', case=False)])

In [None]:
np.random.seed(42)
random_data = np.random.randint(10, 100, size=(4, 4))

df_random = pd.DataFrame(
    random_data,
    columns=['Column_A', 'Column_B', 'Column_C', 'Column_D'],
    index=['Row_1', 'Row_2', 'Row_3', 'Row_4']
)

print(df_random)
print(df_random.shape)

In [None]:
print(df_random)

column_name = 'Column_A'
print(df_random[column_name])

column_values = df_random[column_name].values
top_3_indices = np.argsort(column_values)[-3:]
top_3_values = column_values[top_3_indices]

print(sorted(top_3_values, reverse=True))

replacement_value = -1
df_replaced = df_random.copy()

for idx in top_3_indices:
    df_replaced.iloc[idx, df_replaced.columns.get_loc(column_name)] = replacement_value

print(df_replaced)

In [None]:
print(df_random)
print(type(df_random))

numpy_array = df_random.to_numpy()
print(numpy_array)
print(type(numpy_array))
print(numpy_array.shape)

multiplied_array = numpy_array * 2
print(multiplied_array)

df_result = pd.DataFrame(
    multiplied_array,
    columns=df_random.columns,
    index=df_random.index
)
print(df_result)
print(type(df_result))

print(np.array_equal(df_result.values, df_random.values * 2))