# Week 2: Basics of Data Wrangling Features

# 1: Creating a DataFrame

In [1]:
import pandas as pd
data = {'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [24, 27, 22],
'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
print(df)

      Name  Age         City
0    Alice   24     New York
1      Bob   27  Los Angeles
2  Charlie   22      Chicago


# 2: Reading CSV Files

In [4]:
df = pd.read_csv('sample1.csv') # Assuming 'sample.csv' exists
print(df.head())

   Product ID    Product Name         Category   Brand   Price  \
0        1001  Smart Watch A1      Electronics  BrandX  199.99   
1        1002  Wireless Mouse      Electronics  BrandY   25.99   
2        1003  Leather Jacket          Apparel  BrandZ  149.99   
3        1004    Coffee Maker  Home Appliances  BrandW   89.99   

   Stock Quantity  Rating  
0              50     4.5  
1             150     4.0  
2              30     4.8  
3             120     4.3  


# 3: Data Cleaning

In [5]:
df = pd.DataFrame({'A': [1, 2, None, 4], 'B': [None, 2, 3, 4]})
df_cleaned = df.dropna()
print(df_cleaned)

     A    B
1  2.0  2.0
3  4.0  4.0


# 4: Filtering Data

In [6]:
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]})
filtered_df = df[df['A'] > 2]
print(filtered_df)

   A  B
2  3  7
3  4  8


# 5: Grouping Data

In [7]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar'],
'B': [1, 2, 3, 4]})
grouped = df.groupby('A').sum()
print(grouped)

     B
A     
bar  6
foo  4


# 6: Merging DataFrames

In [8]:
df1 = pd.DataFrame({'A': ['foo', 'bar'], 'B': [1, 2]})
df2 = pd.DataFrame({'A': ['foo', 'bar'], 'C': [3, 4]})
merged_df = pd.merge(df1, df2, on='A')
print(merged_df)

     A  B  C
0  foo  1  3
1  bar  2  4


# 7: Pivot Tables

In [9]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar'],
'B': [1, 2, 3, 4],
'C': [5, 6, 7, 8]})
pivot_table = df.pivot_table(values='C', index='A', columns='B', aggfunc='sum')
print(pivot_table)

B      1    2    3    4
A                      
bar  NaN  6.0  NaN  8.0
foo  5.0  NaN  7.0  NaN


# 8: DataFrame Transformation

In [10]:
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]})
transformed_df = df.transform(lambda x: x ** 2)

print(transformed_df)

    A   B
0   1  25
1   4  36
2   9  49
3  16  64


# 9: Handling Duplicates

In [11]:
df = pd.DataFrame({'A': [1, 1, 2, 3], 'B': [4, 4, 5, 6]})
df_no_duplicates = df.drop_duplicates()
print(df_no_duplicates)

   A  B
0  1  4
2  2  5
3  3  6


# 10: Saving DataFrames to CSV

In [12]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df.to_csv('output.csv', index=False)
print("DataFrame saved to output.csv")

DataFrame saved to output.csv
