In [None]:
pip install pandas numpy



## 데이터 병합

In [12]:
import pandas as pd
import numpy as np
from google.colab import files

# 파일 업로드
uploaded = files.upload()

# CSV 파일 로드
distances = pd.read_csv("white_pixel_center_distances.csv")
vertices = pd.read_csv("vertex_counts.csv")

# 병합
df = distances.merge(
    vertices[['file_name', 'vertex_count']],  # vertex_count 추가
    on='file_name',
    how='left'
)

# 변수명 변경
df['distance'] = df['distance']
df['size'] = df['white_pixel_count']
df['irregularity'] = df['vertex_count']

# 필요한 컬럼만 선택
final_df = df[['file_name', 'distance', 'size', 'irregularity']]

# 결과 확인
print("Updated DataFrame with Selected Columns:")
print(final_df.head())

# Step 4: 결과 저장
final_df.to_csv("patient_tumor_features.csv", index=False)
files.download("patient_tumor_features.csv")

print("\nMerged data saved as 'patient_tumor_features.csv'.")

Saving vertex_counts.csv to vertex_counts (4).csv
Saving white_pixel_center_distances.csv to white_pixel_center_distances (4).csv
Updated DataFrame with Selected Columns:
                           file_name   distance  size  irregularity
0  TCGA_DU_6407_19860514_23_pred.png  13.467167   616             8
1  TCGA_DU_6408_19860521_38_pred.png  12.436158   984            12
2  TCGA_DU_5855_19951217_19_pred.png  18.726413   933            10
3  TCGA_FG_7637_20000922_19_pred.png  22.640193   519             9
4  TCGA_DU_6408_19860521_23_pred.png  19.621488   178             9


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Merged data saved as 'patient_tumor_features.csv'.


## 위험도 점수 및 정규화된 위험도 계산


In [10]:
import pandas as pd
import numpy as np
from google.colab import files

# 파일 업로드
uploaded = files.upload()

# CSV 파일 로드
data = pd.read_csv("patient_tumor_features.csv")

# 필요한 컬럼만 유지 (file_name, distance, size, irregularity)
data = data[['file_name', 'distance', 'size', 'irregularity']]

# 가중치 설정
weights = {'w1': 0.4, 'w2': 0.4, 'w3': 0.2}
min_risk = 0.2
max_risk = 664.0

# 위험도 점수 및 정규화된 위험도 점수 계산 함수
def calculate_normalized_risk(distance, size, irregularity, weights, min_risk, max_risk):
    risk_score = (weights['w1'] * distance) + (weights['w2'] * size) + (weights['w3'] * irregularity)
    normalized_risk_score = round(((risk_score - min_risk) / (max_risk - min_risk)) * 100, 1)
    return normalized_risk_score

# 새로운 컬럼 생성: 정규화된 점수
data['normalized_risk_score'] = data.apply(
    lambda row: calculate_normalized_risk(row['distance'], row['size'], row['irregularity'], weights, min_risk, max_risk),
    axis=1
)

# 결과 확인
print("Updated DataFrame:")
print(data[['file_name', 'distance', 'size', 'irregularity', 'normalized_risk_score']].head())

# 결과 저장
output_path = "patient_brain_tumor_report.csv"
data.to_csv(output_path, index=False, float_format="%.4f")
files.download(output_path)
print(f"\nCalculated features saved to {output_path}")


Saving patient_tumor_features.csv to patient_tumor_features (3).csv
Updated DataFrame:
                           file_name   distance  size  irregularity  \
0  TCGA_DU_6407_19860514_23_pred.png  13.467167   616             8   
1  TCGA_DU_6408_19860521_38_pred.png  12.436158   984            12   
2  TCGA_DU_5855_19951217_19_pred.png  18.726413   933            10   
3  TCGA_FG_7637_20000922_19_pred.png  22.640193   519             9   
4  TCGA_DU_6408_19860521_23_pred.png  19.621488   178             9   

   normalized_risk_score  
0                   38.1  
1                   60.4  
2                   57.6  
3                   32.9  
4                   12.1  


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Calculated features saved to patient_brain_tumor_report.csv
