In [1]:
pwd

'/home/nanohub/q9/MP-PSI_Project'

## 📓 Sample Notebook Overview
### 🔧 Purpose
To merge metadata from three sources:
- mp_metadata.csv → from Materials Project API
- psi_metadata.csv → manually downloaded from NASA PSI
- gspace_metadata.json → exported from G-Space simulation

## 🧠 Notebook Sections
### 1️⃣ Setup & Import

In [5]:
import pandas as pd
import json
from pathlib import Path

# Define paths
mp_path = Path("../data_raw/mp/mp_metadata.csv")
psi_path = Path("../data_raw/psi/psi_metadata.csv")
gspace_path = Path("../data_raw/gspace/gspace_metadata.json")

In [None]:
 unzip /home/nanohub/q9/MP-PSI_Project/data_raw/psi/PSI-166_metadata_PSI-166-ISA.zip

### 2️⃣ Load Metadata

In [None]:
# Load Materials Project metadata
mp_df = pd.read_csv(mp_path)

# Load PSI metadata
psi_df = pd.read_csv(psi_path)

# Load G-Space metadata
with open(gspace_path) as f:
    gspace_data = json.load(f)
gspace_df = pd.json_normalize(gspace_data)

## 3️⃣ Preview & Normalize Columns

In [None]:
# Preview column names
print("MP Columns:", mp_df.columns.tolist())
print("PSI Columns:", psi_df.columns.tolist())
print("G-Space Columns:", gspace_df.columns.tolist())

# Rename for consistency
mp_df.rename(columns={"composition": "material", "temperature_K": "temperature"}, inplace=True)
psi_df.rename(columns={"alloy": "material", "temp_C": "temperature"}, inplace=True)
gspace_df.rename(columns={"input.material": "material", "input.temp": "temperature"}, inplace=True)

## 4️⃣ Merge Metadata

In [None]:
# Concatenate all metadata
merged_df = pd.concat([mp_df, psi_df, gspace_df], ignore_index=True)

# Drop duplicates and sort
merged_df.drop_duplicates(subset=["material", "temperature"], inplace=True)
merged_df.sort_values(by=["material", "temperature"], inplace=True)

## 5️⃣ Export Merged Metadata

In [None]:
# Save to processed folder
output_path = Path("../data_processed/merged_metadata.csv")
merged_df.to_csv(output_path, index=False)

print(f"Merged metadata saved to: {output_path}")

## 📌 Optional Enhancements
 - Add a source column to track origin (MP, PSI, G-Space)
 - Validate temperature units (convert °C to K if needed)
 - Use schema.json to enforce metadata standards