In [None]:
import pandas as pd

# 原始資料
data = {
    'name': ['Amy', 'John', 'Bob', 'Alice', 'Eva'],
    'age': [25, 30, 35, 28, 32],
    'gender': ['Female', 'Male', 'Male', 'Female', 'Female'],
    'salary': [50000, 60000, 55000, 52000, 58000]
}

# 建立 DataFrame
df = pd.DataFrame(data)

print("====================================1")   # 顯示前幾行資料
print(df.head(4))

print("====================================2")   # 顯示資料的維度
print('Shape:', df.shape)

print("====================================3")   # 顯示資料的基本資訊
df.info()

print("====================================4")   # 描述性統計
print(df.describe())


In [None]:
print("====================================5")   # 選擇指定欄位的所有行
print(df.loc[:, 'name'])    #也可以

print("====================================6")   # 選擇年齡大於 30 歲的行
print(df.loc[df['age'] > 30])

print("====================================7")   # 填充缺失值
df['salary'].fillna(df['salary'].mean(), inplace=True)
print(df)

print("====================================8")   # 刪除包含缺失值的行
df.dropna(inplace=True)
print(df)


In [None]:
print("====================================9") # 根據性別進行分組計算平均薪資
average_salary_by_gender = df.groupby('gender')['salary'].mean()
print(average_salary_by_gender)

print("====================================10") # 按照薪資降序排序
df_sorted = df.sort_values('salary', ascending=False)
print(df_sorted)

print("====================================11")

In [None]:
#head()
#shape
#info()
#describe()
#loc、fillna()
#dropna()
#groupby()
#sort_values()

In [None]:
import pandas as pd

# 建立 DataFrame
df1 = pd.DataFrame({
    'key_column': ['A', 'B', 'C', 'D'],
    'value_column1': [1, 2, 3, 4]
})

df2 = pd.DataFrame({
    'key_column': ['B', 'D', 'E', 'F'],
    'value_column2': [5, 6, 7, 8]
})

print("====================================12")   # 合併 DataFrame
merged_df = pd.merge(df1, df2, on='key_column')
print(merged_df)

print("====================================13")  # 連接 DataFrame
concatenated_df = pd.concat([df1, df2], axis=0)
print(concatenated_df)

print("====================================14")  # 樞紐分析
pivot_table = merged_df.pivot_table(values=['value_column1', 'value_column2'], index='key_column')
print(pivot_table)

print("====================================15")  # 填充缺失值
concatenated_df.fillna(0, inplace=True)
print(concatenated_df)

In [None]:
print("====================================16")  # 寫入 CSV 檔案
pivot_table.to_csv('pivot_table.csv')

print("====================================17")  # 處理重複行
duplicated_rows = concatenated_df.duplicated()
print('Duplicated Rows:')
print(concatenated_df[duplicated_rows])

print("====================================18")  
concatenated_df.drop_duplicates(inplace=True)
print('DataFrame after dropping duplicates:')
print(concatenated_df)


In [None]:
#使用 pd.merge() 將 df1 和 df2 根據 key_column 進行合併操作，生成了一個合併後的 DataFrame merged_df。
#使用 pd.concat() 將 df1 和 df2 沿著行方向連接，生成了一個連接後的 DataFrame concatenated_df。
#使用 pivot_table() 在 merged_df 上進行樞紐分析，根據 key_column 生成了一個包含兩個值欄位的樞紐表 pivot_table。
#使用 fillna() 將 pivot_table 中的缺失值填充為 0。
#使用 to_csv() 將 pivot_table 寫入 CSV 檔案。
#使用 duplicated() 檢查 concatenated_df 是否有重複行，並使用 drop_duplicates() 刪除重複行。

In [None]:
#新增欄位

import pandas as pd

# 建立 DataFrame
data = {
    'name': ['Amy', 'John', 'Bob', 'Alice', 'Eva'],
    'age': [25, 30, 35, 28, 32],
    'gender': ['Female', 'Male', 'Male', 'Female', 'Female'],
    'salary': [50000, 60000, 55000, 52000, 58000]
}
df = pd.DataFrame(data)

# 新增欄位
df['bonus'] = df['salary'] * 0.1
print("\nDataFrame with New Column:")
print(df)

In [2]:
#網路範例

import pandas as pd

# 從網路資料創建 DataFrame
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(url)

# 顯示 DataFrame 的前幾行
print("DataFrame:")
print(df.head())

# 顯示 DataFrame 的基本資訊
print("\nDataFrame Info:")
print(df.info())




DataFrame:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN   

In [3]:
# 描述性統計
print("\nDescriptive Statistics:")
print(df.describe())

# 選擇特定欄位的所有行
print("\nSelected Column:")
print(df['Name'])


Descriptive Statistics:
       PassengerId    Survived      Pclass         Age       SibSp  \
count   891.000000  891.000000  891.000000  714.000000  891.000000   
mean    446.000000    0.383838    2.308642   29.699118    0.523008   
std     257.353842    0.486592    0.836071   14.526497    1.102743   
min       1.000000    0.000000    1.000000    0.420000    0.000000   
25%     223.500000    0.000000    2.000000   20.125000    0.000000   
50%     446.000000    0.000000    3.000000   28.000000    0.000000   
75%     668.500000    1.000000    3.000000   38.000000    1.000000   
max     891.000000    1.000000    3.000000   80.000000    8.000000   

            Parch        Fare  
count  891.000000  891.000000  
mean     0.381594   32.204208  
std      0.806057   49.693429  
min      0.000000    0.000000  
25%      0.000000    7.910400  
50%      0.000000   14.454200  
75%      0.000000   31.000000  
max      6.000000  512.329200  

Selected Column:
0                                Braun

In [4]:
# 條件篩選
filtered_df = df[df['Age'] > 30]
print("\nFiltered DataFrame:")
print(filtered_df)

# 新增欄位
df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
print("\nDataFrame with New Column:")
print(df)


Filtered DataFrame:
     PassengerId  Survived  Pclass  \
1              2         1       1   
3              4         1       1   
4              5         0       3   
6              7         0       1   
11            12         1       1   
..           ...       ...     ...   
873          874         0       3   
879          880         1       1   
881          882         0       3   
885          886         0       3   
890          891         0       3   

                                                  Name     Sex   Age  SibSp  \
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
6                              McCarthy, Mr. Timothy J    male  54.0      0   
11                            Bonnell, Miss. Elizabeth  female  58.0      0   
..                                              

In [5]:
# 根據欄位排序
sorted_df = df.sort_values('Fare', ascending=False)
print("\nSorted DataFrame:")
print(sorted_df)

# 分組計算平均年齡
grouped_df = df.groupby('Pclass')['Age'].mean()
print("\nGrouped DataFrame:")
print(grouped_df)


Sorted DataFrame:
     PassengerId  Survived  Pclass                                Name  \
258          259         1       1                    Ward, Miss. Anna   
737          738         1       1              Lesurer, Mr. Gustave J   
679          680         1       1  Cardeza, Mr. Thomas Drake Martinez   
88            89         1       1          Fortune, Miss. Mabel Helen   
27            28         0       1      Fortune, Mr. Charles Alexander   
..           ...       ...     ...                                 ...   
633          634         0       1       Parr, Mr. William Henry Marsh   
413          414         0       2      Cunningham, Mr. Alfred Fleming   
822          823         0       1     Reuchlin, Jonkheer. John George   
732          733         0       2                Knight, Mr. Robert J   
674          675         0       2          Watson, Mr. Ennis Hastings   

        Sex   Age  SibSp  Parch    Ticket      Fare        Cabin Embarked  \
258  female  35