# Anonymous Records in the Chu San Zang Ji Ji

In [None]:
!pip install pandas matplotlib

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Use the "raw" URL instead of the "blob" URL
raw_url = 'https://raw.githubusercontent.com/Silk-Road-Corpus/silk_road_corpus/main/data/chusanzangjiji.csv'

try:
    df = pd.read_csv(raw_url)
    print("Successfully loaded the data!")
except Exception as e:
    print(f"An error occurred: {e}")



In [None]:
filtered_df = df[(df['CSZJJ Fascicle'] == 3) | (df['CSZJJ Fascicle'] == 4)]

In [None]:
print(f'Total number of anonymous records (fascicle 3 or 4): {len(filtered_df)}')

In [None]:
df3 = df[df['CSZJJ Fascicle'] == 3]
section_counts_df3 = df3['CSZJJ Section No.'].value_counts()
print(f'Number of records in fascicle 3 sections:')
print(section_counts_df3)
df4 = df[df['CSZJJ Fascicle'] == 4]
print(f'Number of records in fascicle 4: {len(df4)}')


In [None]:
lost_df = filtered_df[filtered_df['CSZJJ mentions lost or not seen'].notnull()]
print(f'Number of anonymous records lost or not seen: {len(lost_df)}')

In [None]:
modern_canon_df = filtered_df[filtered_df['Modern Collection Ref'].notnull()]
print(f'Number of anonymous records related to a modern collection: {len(modern_canon_df)}')

In [None]:
t154_series = modern_canon_df['Modern Collection Ref'].str.contains('T 154')
print(f'Number of anonymous records related to T 154: {t154_series.sum()}')
t154_df = modern_canon_df[t154_series]['Modern Collection Ref'].sort_values()
print(f'List of anonymous records related to T 154:')
t154_df[:40]

In [None]:
counts = filtered_df['Taishō classification'].value_counts()
filtered_counts = counts[counts > 5]
plt.figure(figsize=(12, 6))
filtered_counts.plot(kind='bar')
plt.title('Number of Records by Taishō Classification (count > 5)')
plt.xlabel('Taishō Classification')
plt.ylabel('Number of Records')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()