In [2]:
import pandas as pd
import numpy as np
from scipy.io import mmread

# === Step 1: Load matrices ===
content = mmread("texas_content.mtx").tocsr()
inbound = mmread("texas_inbound.mtx").tocsr()
outbound = mmread("texas_outbound.mtx").tocsr()
cites = mmread("texas_cites.mtx").tocsr()

print("Shapes:")
print("content:", content.shape)
print("inbound:", inbound.shape)
print("outbound:", outbound.shape)
print("cites:", cites.shape)

# === Step 2: Load labels ===
labels = pd.read_csv("texas_act.txt", header=None, sep=r"\s+")[0].values  # document label indices
label_names = pd.read_csv("labels.txt", header=None)[0].values           # actual label names

# Map numeric labels to text
labels_text = [label_names[i-1] for i in labels]

# === Step 3: Convert to DataFrames ===
df_content = pd.DataFrame(content.toarray())
df_content["label"] = labels_text

df_inbound = pd.DataFrame(inbound.toarray())
df_inbound["label"] = labels_text

df_outbound = pd.DataFrame(outbound.toarray())
df_outbound["label"] = labels_text

df_cites = pd.DataFrame(cites.toarray())
df_cites["label"] = labels_text

# === Step 4: Save to CSV ===
df_content.to_csv("texas_content.csv", index=False)
df_inbound.to_csv("texas_inbound.csv", index=False)
df_outbound.to_csv("texas_outbound.csv", index=False)
df_cites.to_csv("texas_cites.csv", index=False)

print("✅ Conversion complete! CSV files saved:")
print("texas_content.csv, texas_inbound.csv, texas_outbound.csv, texas_cites.csv")


Shapes:
content: (187, 1703)
inbound: (187, 187)
outbound: (187, 187)
cites: (187, 187)
✅ Conversion complete! CSV files saved:
texas_content.csv, texas_inbound.csv, texas_outbound.csv, texas_cites.csv
