In [96]:
import pandas as pd
import numpy as np

# raw_data 딕셔너리 생성
raw_data = {
    'first_name': ['Jason', np.nan, 'Tina', 'Jake', 'Amy'],
    'last_name': ['Miller', np.nan, 'Ali', 'Milner', 'Cooze'],
    'age': [42, np.nan, 36, 24, 73],
    'sex': ['m', np.nan, 'f', 'm', 'f'],
    'preTestScore': [4, np.nan, np.nan, 2, 3],
    'postTestScore': [25, np.nan, np.nan, 62, 70]
}

# DataFrame으로 변환
df = pd.DataFrame(raw_data, columns=['first_name', 'last_name', 'age', 'sex', 'preTestScore', 'postTestScore'])

# DataFrame 출력
df

Unnamed: 0,first_name,last_name,age,sex,preTestScore,postTestScore
0,Jason,Miller,42.0,m,4.0,25.0
1,,,,,,
2,Tina,Ali,36.0,f,,
3,Jake,Milner,24.0,m,2.0,62.0
4,Amy,Cooze,73.0,f,3.0,70.0


In [97]:
df.isnull().sum()/len(df)

first_name       0.2
last_name        0.2
age              0.2
sex              0.2
preTestScore     0.4
postTestScore    0.4
dtype: float64

In [98]:
df_cleaned=df.dropna(how='all')
df_cleaned

Unnamed: 0,first_name,last_name,age,sex,preTestScore,postTestScore
0,Jason,Miller,42.0,m,4.0,25.0
2,Tina,Ali,36.0,f,,
3,Jake,Milner,24.0,m,2.0,62.0
4,Amy,Cooze,73.0,f,3.0,70.0


In [99]:
df_cleaned=df.dropna(how='any')
df_cleaned

Unnamed: 0,first_name,last_name,age,sex,preTestScore,postTestScore
0,Jason,Miller,42.0,m,4.0,25.0
3,Jake,Milner,24.0,m,2.0,62.0
4,Amy,Cooze,73.0,f,3.0,70.0


In [100]:
df['location']=np.nan
df

Unnamed: 0,first_name,last_name,age,sex,preTestScore,postTestScore,location
0,Jason,Miller,42.0,m,4.0,25.0,
1,,,,,,,
2,Tina,Ali,36.0,f,,,
3,Jake,Milner,24.0,m,2.0,62.0,
4,Amy,Cooze,73.0,f,3.0,70.0,


In [101]:
df.fillna(0)

Unnamed: 0,first_name,last_name,age,sex,preTestScore,postTestScore,location
0,Jason,Miller,42.0,m,4.0,25.0,0.0
1,0,0,0.0,0,0.0,0.0,0.0
2,Tina,Ali,36.0,f,0.0,0.0,0.0
3,Jake,Milner,24.0,m,2.0,62.0,0.0
4,Amy,Cooze,73.0,f,3.0,70.0,0.0


In [102]:
# 데이터프레임 생성
edges = pd.DataFrame({
    'source': [0, 1, 2],
    'target': [2, 2, 3],
    'weight': [3, 4, 5],
    'color': ['red', 'blue', 'blue']
})

# 데이터프레임 출력
print(edges)

   source  target  weight color
0       0       2       3   red
1       1       2       4  blue
2       2       3       5  blue


In [103]:
edges.dtypes

source     int64
target     int64
weight     int64
color     object
dtype: object

In [106]:
edges_dummies = pd.get_dummies(edges, columns=['color'])
edges_dummies = edges_dummies.astype(int)
edges_dummies

Unnamed: 0,source,target,weight,color_blue,color_red
0,0,2,3,0,1
1,1,2,4,1,0
2,2,3,5,1,0


In [107]:
import pandas as pd

# 예시 데이터프레임
edges = pd.DataFrame({
    'source': [0, 1, 2],
    'target': [2, 2, 3],
    'weight': [3, 4, 5]
})

# 숫자에 해당하는 weight 값들을 M, L, XL로 변환하는 딕셔너리
weight_dict = {3: "M", 4: "L", 5: "XL"}

# weight 열의 값을 딕셔너리 매핑을 통해 'weight_sign' 열로 변환
edges["weight_sign"] = edges["weight"].map(weight_dict)

# 'weight_sign' 열에 대해 원-핫 인코딩 수행
weight_sign = pd.get_dummies(edges["weight_sign"])

# 결과 출력
print(weight_sign)


       L      M     XL
0  False   True  False
1   True  False  False
2  False  False   True


In [108]:
pd.concat([edges,weight_sign],axis=1)

Unnamed: 0,source,target,weight,weight_sign,L,M,XL
0,0,2,3,M,False,True,False
1,1,2,4,L,True,False,False
2,2,3,5,XL,False,False,True


In [109]:
import pandas as pd

# 원시 데이터(raw_data) 생성
raw_data = {
    'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks',
                 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts'],
    'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '1st', '2nd', '2nd'],
    'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger'],
    'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3],
    'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70]
}

# 데이터프레임 생성
df = pd.DataFrame(raw_data, columns=['regiment', 'company', 'name', 'preTestScore', 'postTestScore'])

# 데이터프레임 출력
print(df)


     regiment company      name  preTestScore  postTestScore
0  Nighthawks     1st    Miller             4             25
1  Nighthawks     1st  Jacobson            24             94
2  Nighthawks     2nd       Ali            31             57
3  Nighthawks     2nd    Milner             2             62
4    Dragoons     1st     Cooze             3             70
5    Dragoons     1st     Jacon             4             25
6    Dragoons     2nd    Ryaner            24             94
7      Scouts     1st      Sone            31             57
8      Scouts     2nd     Sloan             2             62
9      Scouts     2nd     Piger             3             70


In [111]:

# 구간 정의 (bins)
bins = [0, 25, 50, 75, 100]

# 각 구간에 대응하는 그룹 이름 정의
group_names = ['D', 'C', 'B', 'A']

# postTestScore 값을 기준으로 구간별로 범주화
categories = pd.cut(df['postTestScore'], bins, labels=group_names)

# 결과 출력
print(categories)

0    D
1    A
2    B
3    B
4    B
5    D
6    A
7    B
8    B
9    B
Name: postTestScore, dtype: category
Categories (4, object): ['D' < 'C' < 'B' < 'A']
