In [None]:
pip install pandas numpy



In [None]:
import pandas as pd
from google.colab import files

# 파일 업로드
uploaded = files.upload()

# CSV 파일 불러오기
ratios = pd.read_csv("filtered_white_pixel_ratios.csv")
distances = pd.read_csv("white_pixel_center_distances.csv")
vertices = pd.read_csv("vertex_counts.csv")

# Step 2: 병합
merged = distances.merge(
    ratios[['file_name', 'white_pixel_ratio', 'total_pixels']],
    on='file_name',
    how='left'
).merge(
    vertices[['file_name', 'vertex_count']],
    on='file_name',
    how='left'
)

# Step 3: 병합 결과 확인
print("\nMerged Data Preview:")
print(merged.head())

print("\nMerged Data Info:")
print(merged.info())

# Step 4: 결과 저장
merged.to_csv("patient_tumor_features.csv", index=False)
files.download("patient_tumor_features.csv")

print("\nMerged data saved as 'patient_tumor_features.csv'.")

Saving vertex_counts.csv to vertex_counts (1).csv
Saving filtered_white_pixel_ratios.csv to filtered_white_pixel_ratios (1).csv
Saving white_pixel_center_distances.csv to white_pixel_center_distances (1).csv

Merged Data Preview:
                           file_name  tumor_center_x  tumor_center_y  \
0  TCGA_DU_6407_19860514_23_pred.png       50.644481       65.730519   
1  TCGA_DU_6408_19860521_38_pred.png       54.272358       56.252033   
2  TCGA_DU_5855_19951217_19_pred.png       54.092176       48.109325   
3  TCGA_FG_7637_20000922_19_pred.png       41.479769       61.672447   
4  TCGA_DU_6408_19860521_23_pred.png       46.573034       54.983146   

   image_center_x  image_center_y  white_pixel_count   distance  \
0              64              64                616  13.467167   
1              64              64                984  12.436158   
2              64              64                933  18.726413   
3              64              64                519  22.640193   
4 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Merged data saved as 'patient_tumor_features.csv'.


## 변수 Distance, Size, Irregularity 계산 및 선택

In [None]:
import pandas as pd
import numpy as np
from google.colab import files

# 파일 업로드
uploaded = files.upload()

# CSV 파일 로드
df = pd.read_csv("patient_tumor_features.csv")
print("Original DataFrame:")
print(df.head())

# Distance 계산 함수
def calculate_distance(row):
    return round(np.sqrt((row['tumor_center_x'] - row['image_center_x'])**2 +
                         (row['tumor_center_y'] - row['image_center_y'])**2), 3)

# distance 컬럼 계산 후 소수점 셋째 자리까지 반올림
df['distance'] = df.apply(calculate_distance, axis=1)

# Size 컬럼은 white_pixel_count 값 그대로 사용
df['size'] = df['white_pixel_count']

# Irregularity 컬럼은 vertex_count 값 그대로 사용
df['irregularity'] = df['vertex_count']

# 필요한 컬럼만 선택
final_df = df[['file_name', 'distance', 'size', 'irregularity']]

# 결과 확인
print("Updated DataFrame with Selected Columns:")
print(final_df.head())

# 결과 저장
output_path = "calculated_tumor_features.csv"
final_df.to_csv(output_path, index=False)
files.download(output_path)
print(f"\nCalculated features saved to {output_path}")


Saving patient_tumor_features.csv to patient_tumor_features (2).csv
Original DataFrame:
                           file_name  tumor_center_x  tumor_center_y  \
0  TCGA_DU_6407_19860514_23_pred.png       50.644481       65.730519   
1  TCGA_DU_6408_19860521_38_pred.png       54.272358       56.252033   
2  TCGA_DU_5855_19951217_19_pred.png       54.092176       48.109325   
3  TCGA_FG_7637_20000922_19_pred.png       41.479769       61.672447   
4  TCGA_DU_6408_19860521_23_pred.png       46.573034       54.983146   

   image_center_x  image_center_y  white_pixel_count   distance  \
0              64              64                616  13.467167   
1              64              64                984  12.436158   
2              64              64                933  18.726413   
3              64              64                519  22.640193   
4              64              64                178  19.621488   

   white_pixel_ratio  total_pixels  vertex_count  
0           0.037598     

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Calculated features saved to calculated_tumor_features.csv


## 위험도 점수 및 정규화된 위험도 계산

In [None]:
import pandas as pd
import numpy as np
from google.colab import files

# 파일 업로드
uploaded = files.upload()

# CSV 파일 로드
data = pd.read_csv("calculated_tumor_features.csv")

# 필요한 컬럼만 유지 (file_name, distance, size, irregularity)
data = data[['file_name', 'distance', 'size', 'irregularity']]

# 가중치 설정
weights = {'w1': 0.4, 'w2': 0.4, 'w3': 0.2}
min_risk = 0.2
max_risk = 664.0

# 위험도 점수 및 정규화된 위험도 점수 계산 함수
def calculate_normalized_risk(distance, size, irregularity, weights, min_risk, max_risk):
    risk_score = (weights['w1'] * distance) + (weights['w2'] * size) + (weights['w3'] * irregularity)
    normalized_risk_score = round(((risk_score - min_risk) / (max_risk - min_risk)) * 100, 1)
    return normalized_risk_score

# 새로운 컬럼 생성: 정규화된 점수
data['normalized_risk_score'] = data.apply(
    lambda row: calculate_normalized_risk(row['distance'], row['size'], row['irregularity'], weights, min_risk, max_risk),
    axis=1
)

# 결과 확인
print("Updated DataFrame:")
print(data[['file_name', 'distance', 'size', 'irregularity', 'normalized_risk_score']].head())

# 결과 저장
output_path = "patient_tumor_risk_scores.csv"
data.to_csv(output_path, index=False, float_format="%.4f")
files.download(output_path)
print(f"\nCalculated features saved to {output_path}")


Saving calculated_tumor_features.csv to calculated_tumor_features (3).csv
Updated DataFrame:
                           file_name  distance  size  irregularity  \
0  TCGA_DU_6407_19860514_23_pred.png    13.467   616             8   
1  TCGA_DU_6408_19860521_38_pred.png    12.436   984            12   
2  TCGA_DU_5855_19951217_19_pred.png    18.726   933            10   
3  TCGA_FG_7637_20000922_19_pred.png    22.640   519             9   
4  TCGA_DU_6408_19860521_23_pred.png    19.621   178             9   

   normalized_risk_score  
0                   38.1  
1                   60.4  
2                   57.6  
3                   32.9  
4                   12.1  


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Calculated features saved to patient_tumor_risk_scores.csv
