In [None]:
#train.csvの中身の確認
import pandas as pd

#データフレームの欠損数、ユニーク数、最頻値とその出現回数を返す関数
def data_quality_status(df):
    # 1. データ型の確認
    dtype = df.dtypes
    
    # 2. 欠損値の数の確認
    null_count = df.isnull().sum()
    
    # 3. 欠損率の確認
    null_percent = (df.isnull().sum() / len(df)) * 100
    
    # 4. ユニーク数の確認
    unique_count = df.nunique()
    
    # 5. 最頻値とその個数の確認
    most_freq_vals = []
    most_freq_counts = []
    for col in df.columns:
        if df[col].isnull().all():
            most_freq_vals.append(None)
            most_freq_counts.append(0)
        else:
            top_val = df[col].value_counts().index[0]
            top_count = df[col].value_counts().iloc[0]
            
            most_freq_vals.append(top_val)
            most_freq_counts.append(top_count)


    # 結果をデータフレームにまとめる
    quality_df = pd.DataFrame({
        'Data Type': dtype,
        'Missing Values': null_count,
        'Missing (%)': null_percent,
        'Unique Values': unique_count,
        'Most Frequent Value': most_freq_vals,
        'Frequency of Most Frequent Value': most_freq_counts
    })
    
    return quality_df

#train.csvにdata_quality_status関数を適用
train_index_path = "../Main_Dataset/train.csv"
df_train_index = pd.read_csv(train_index_path)

#data_quality_status関数の出力結果に加え、データサイズと先頭データの表示
print(f"データのサイズ: {df_train_index.shape}")    
print("\n=== 先頭5行のデータ ===")
display(df_train_index.head())
display(data_quality_status(df_train_index))


データのサイズ: (977, 3)

=== 先頭5行のデータ ===


Unnamed: 0,id,fs,sig_len
0,7663343,500,5000
1,10140238,1000,10000
2,11842146,1000,10000
3,19030958,250,2500
4,19585145,512,5120


Unnamed: 0,Data Type,Missing Values,Missing (%),Unique Values,Most Frequent Value,Frequency of Most Frequent Value
id,int64,0,0.0,977,7663343,1
fs,int64,0,0.0,6,500,163
sig_len,int64,0,0.0,6,5000,163
