## 7. Data Transformation

- **Renaming**: `rename()`
- **Mapping and Applying Functions**: `apply()`, `map()`, `applymap()`
- **Sorting**: `sort_values()`, `sort_index()`
- **Reshaping**: `pivot()`, `pivot_table()`, `melt()`

In [1]:
import pandas as pd

class CustomObject:
    def __init__(self, name, value):
        self.name = name
        self.value = value

    def __repr__(self):
        return f"CustomObject(name={self.name}, value={self.value})"


# Create instances of the custom class
obj1 = CustomObject('A', 10)
obj2 = CustomObject('B', 20)
obj3 = CustomObject('C', 30)

# Create a Pandas Series with these custom objects
custom_series = pd.Series([obj1, obj2, obj3], dtype='object')

print("Pandas Series with Custom Objects:")
print(custom_series)


Pandas Series with Custom Objects:
0    CustomObject(name=A, value=10)
1    CustomObject(name=B, value=20)
2    CustomObject(name=C, value=30)
dtype: object


In [2]:
# Accessing elements
first_obj = custom_series[0]
print("\nFirst Object:")
print(first_obj)

# Applying a function to each element
def increase_value(custom_obj):
    custom_obj.value += 5
    return custom_obj

updated_series = custom_series.apply(increase_value)

print("\nUpdated Series with Increased Values:")
print(updated_series)


First Object:
CustomObject(name=A, value=10)

Updated Series with Increased Values:
0    CustomObject(name=A, value=15)
1    CustomObject(name=B, value=25)
2    CustomObject(name=C, value=35)
dtype: object


In [3]:
# Create a DataFrame with a column of custom objects
data = {'custom_col': [obj1, obj2, obj3]}
df = pd.DataFrame(data)

print("\nDataFrame with Custom Objects:")
print(df)

# Apply a function to a column of custom objects
df['custom_col'] = df['custom_col'].apply(increase_value)

print("\nDataFrame with Updated Custom Objects:")
print(df)



DataFrame with Custom Objects:
                       custom_col
0  CustomObject(name=A, value=15)
1  CustomObject(name=B, value=25)
2  CustomObject(name=C, value=35)

DataFrame with Updated Custom Objects:
                       custom_col
0  CustomObject(name=A, value=20)
1  CustomObject(name=B, value=30)
2  CustomObject(name=C, value=40)


In [3]:
import pandas as pd

# Example DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 35, 40],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']}
df = pd.DataFrame(data)

# Renaming columns
renamed_df = df.rename(columns={'Name': 'Full Name', 'City': 'Location'}, index={0: 'A', 1: 'B'})
print(renamed_df)

# Renaming rows
# renamed_df = df.rename(index={0: 'A', 1: 'B'})
# print(renamed_df)

  Full Name  Age     Location
A     Alice   25     New York
B       Bob   30  Los Angeles
2   Charlie   35      Chicago
3     David   40      Houston


In [11]:
# Applying a function to a column
df['Age in 10 Years'] = df['Age'].apply(lambda x: x + 10)
print(df)

# Applying a mapping to a column
df['City Code'] = df['City'].map({'New York': 'NY', 'Los Angeles': 'LA', 'Chicago': 'CHI', 'Houston': 'HOU'})
print(df)

# Applying a function to every element
df.loc[:, ['Name', 'City', 'City Code']] = df.loc[:, ['Name', 'City', 'City Code']].map(str.upper)
df

      Name  Age         City  Age in 10 Years City Code
0    ALICE   25     New York               35        NY
1      BOB   30  Los Angeles               40        LA
2  CHARLIE   35      Chicago               45       CHI
3    DAVID   40      Houston               50       HOU
      Name  Age         City  Age in 10 Years City Code
0    ALICE   25     New York               35        NY
1      BOB   30  Los Angeles               40        LA
2  CHARLIE   35      Chicago               45       CHI
3    DAVID   40      Houston               50       HOU


Unnamed: 0,Name,Age,City,Age in 10 Years,City Code
0,ALICE,25,NEW YORK,35,NY
1,BOB,30,LOS ANGELES,40,LA
2,CHARLIE,35,CHICAGO,45,CHI
3,DAVID,40,HOUSTON,50,HOU


In [23]:
# Example DataFrame
data = {'Date': ['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-02'],
        'City': ['New York', 'Los Angeles', 'New York', 'Los Angeles'],
        'Sales': [200, 150, 220, 130]}
df = pd.DataFrame(data)

# Pivoting data
pivot_df = df.pivot(index='Date', columns='City', values='Sales')


# Creating a pivot table
pivot_table_df = df.pivot_table(index='Date', columns='City', values='Sales', aggfunc='sum')
pivot_table_df.index


Index(['2023-01-01', '2023-01-02'], dtype='object', name='Date')

In [17]:
# Example wide DataFrame
wide_data = {'Date': ['2023-01-01', '2023-01-02'],
             'New York': [200, 220],
             'Los Angeles': [150, 130]}
wide_df = pd.DataFrame(wide_data)

# Melting data
melted_df = pd.melt(wide_df, id_vars=['Date'], var_name='City', value_name='Sales')
print(wide_df), print(melted_df)

         Date  New York  Los Angeles
0  2023-01-01       200          150
1  2023-01-02       220          130
         Date         City  Sales
0  2023-01-01     New York    200
1  2023-01-02     New York    220
2  2023-01-01  Los Angeles    150
3  2023-01-02  Los Angeles    130


(None, None)