<a href="https://colab.research.google.com/github/Oatthapong/Python/blob/main/Data_Transformation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# นำเข้า Library เเละสร้างข้อมูล

In [None]:
import pandas as pd

In [None]:
data = {
    "customer_id": [101, 102, 103, 104, 105],
    "name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "age": [25, 30, 35, 40, 45],
    "gender": ["F", "M", "M", "M", "F"],
    "purchase_amount": ["10000", "15000", "20000", "25000", "30000"]
}

df = pd.DataFrame(data)
print(df)

   customer_id     name  age gender purchase_amount
0          101    Alice   25      F           10000
1          102      Bob   30      M           15000
2          103  Charlie   35      M           20000
3          104    David   40      M           25000
4          105      Eve   45      F           30000


In [None]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   customer_id      5 non-null      int64 
 1   name             5 non-null      object
 2   age              5 non-null      int64 
 3   gender           5 non-null      object
 4   purchase_amount  5 non-null      object
dtypes: int64(2), object(3)
memory usage: 332.0+ bytes


# เปลี่ยนประเภทข้อมูล (Data Type Conversion)

In [None]:
# แปลง purchase_amount
df["purchase_amount"] = pd.to_numeric(df["purchase_amount"])
print(df)

   customer_id     name  age gender  purchase_amount
0          101    Alice   25      F            10000
1          102      Bob   30      M            15000
2          103  Charlie   35      M            20000
3          104    David   40      M            25000
4          105      Eve   45      F            30000


# เเปลงค่าข้อมูล (Mapping & Ecoding)

In [None]:
print(df["gender"].unique())  # ดูค่าทั้งหมดที่อยู่ในคอลัมน์ gender

['F' 'M']


In [None]:
# เเปลงค่า gender เป็นตัวเลข (F เป็น 0 , M เป็น 1)
df["gender"] = df ["gender"].map({"F":0, "M":1})
print(df)

   customer_id     name  age  gender  purchase_amount
0          101    Alice   25       0            10000
1          102      Bob   30       1            15000
2          103  Charlie   35       1            20000
3          104    David   40       1            25000
4          105      Eve   45       0            30000


# รวมข้อมูล (Merging & Concatenation)

In [None]:
# สร้าง DataFrame เพิ่มเติม
data2 = {
    "customer_id": [106, 107],
    "name": ["Frank", "Grace"],
    "age": [50, 55],
    "gender": ["1", "0"],
    "purchase_amount": [35000, 40000]
}
df2 = pd.DataFrame(data2)
df_combined = pd.concat([df,df2], ignore_index=True)
print(df_combined)

   customer_id     name  age gender  purchase_amount
0          101    Alice   25      0            10000
1          102      Bob   30      1            15000
2          103  Charlie   35      1            20000
3          104    David   40      1            25000
4          105      Eve   45      0            30000
5          106    Frank   50      1            35000
6          107    Grace   55      0            40000


# ปรับโครงสร้างข้อมูล (Pivot & Melt)

In [None]:
df_melted = df.melt(id_vars=["customer_id","name"],var_name="Attribute",value_name="Value")
print(df_melted)

    customer_id     name        Attribute  Value
0           101    Alice              age     25
1           102      Bob              age     30
2           103  Charlie              age     35
3           104    David              age     40
4           105      Eve              age     45
5           101    Alice           gender      0
6           102      Bob           gender      1
7           103  Charlie           gender      1
8           104    David           gender      1
9           105      Eve           gender      0
10          101    Alice  purchase_amount  10000
11          102      Bob  purchase_amount  15000
12          103  Charlie  purchase_amount  20000
13          104    David  purchase_amount  25000
14          105      Eve  purchase_amount  30000


# ใช้ Aggregation เพื่อสรุปข้อมูล

In [None]:
df_summary = df.groupby("gender")["purchase_amount"].mean().reset_index()
print(df_summary)

   gender  purchase_amount
0       0          20000.0
1       1          20000.0


In [None]:
df_agepurchaseamount = df.groupby("age")["purchase_amount"].sum().reset_index()
print(df_agepurchaseamount)

   age  purchase_amount
0   25            10000
1   30            15000
2   35            20000
3   40            25000
4   45            30000


In [None]:
# กำหนดช่วงอายุ
bins = [0, 29, 40, float("inf")]  # แบ่งเป็น 3 กลุ่ม: <30, 30-40, >40
labels = ["<30", "30-40", ">40"]

# สร้างคอลัมน์ใหม่ที่แบ่งกลุ่มอายุ
df["age_group"] = pd.cut(df["age"], bins=bins, labels=labels)

# แสดง DataFrame ที่มีคอลัมน์ age_group
print(df)

   customer_id     name  age  gender  purchase_amount age_group
0          101    Alice   25       0            10000       <30
1          102      Bob   30       1            15000     30-40
2          103  Charlie   35       1            20000     30-40
3          104    David   40       1            25000     30-40
4          105      Eve   45       0            30000       >40


In [None]:
df_grouped = df.groupby("age_group")["purchase_amount"].mean().reset_index()

# แสดงผลลัพธ์
print(df_grouped)

  age_group  purchase_amount
0       <30          10000.0
1     30-40          20000.0
2       >40          30000.0


  df_grouped = df.groupby("age_group")["purchase_amount"].mean().reset_index()
