<a href="https://colab.research.google.com/github/HonestlyMasquerading/MaterialPrediction_NanoGeneration/blob/main/Nanotech_MaterialScience.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Setup
 ## We start by installing and importing the necessary libraries.

In [None]:
!pip install torch torchvision torchaudio
!pip install torchdata
!pip install dgl
!pip install mp_api pymatgen matgl matplotlib ase
!pip install fastai timm

import mp_api.client as mp
from pymatgen.core import Structure
import matgl
import torch
from torch.utils.data import DataLoader, random_split
from dgl.data import DGLDataset
from dgl.nn import GraphConv
import torch.nn as nn
import matplotlib.pyplot as plt
from ase.io import write
from ase.visualize import view
from pymatgen.io.ase import AseAtomsAdaptor
from IPython.display import Image, display
import warnings
warnings.filterwarnings("ignore")




OSError: libcusparse.so.11: cannot open shared object file: No such file or directory

##2. API Access

In [None]:
# Enter your API key
api_key = "mXS1Za3SngZp4FqcMl93KW36qQSuv6tR"
mpr = mp.MPRester(api_key)

ValueError: unconverted data remains: .post

## 3. Data Query
We query the API for materials with available elasticity data (Young's modulus) to train our model.

In [None]:
# Define fields to retrieve
fields = ["material_id", "structure", "elasticity.youngs_modulus", "is_metal", "is_insulator"]

# Query materials with elasticity data (up to 1000 materials)
docs = mpr.summary.search(has_fields=["elasticity"], fields=fields, limit=1000)

# Extract relevant data
material_ids = [doc.material_id for doc in docs]
structures = [doc.structure for doc in docs]
youngs_moduli = [doc.elasticity.youngs_modulus for doc in docs]
material_types = ["metal" if doc.is_metal else "insulator" if doc.is_insulator else "unknown" for doc in docs]

print(f"Retrieved {len(material_ids)} materials with elasticity data.")

##4. Data Preprocessing
We convert crystal structures into graph representations for the GNN using matgl. Each graph represents atoms as nodes and bonds as edges, with features like atomic numbers.

In [None]:
# Initialize Structure2Graph converter
element_types = ["H", "Li", "Be", "B", "C", "N", "O", "F", "Na", "Mg", "Al", "Si", "P", "S", "Cl", "K",
                 "Ca", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br",
                 "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb",
                 "Te", "I", "Cs", "Ba", "La", "Ce", "Nd", "Sm", "Gd", "Dy", "Er", "Yb", "Hf", "Ta", "W",
                 "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi"]
s2g = Structure2Graph(element_types=element_types, cutoff=5.0)

# Convert structures to graphs
graphs = []
labels = []
for struct, ym in zip(structures, youngs_moduli):
    try:
        graph = s2g.get_graph(struct)[0]  # Get only the graph, not the state
        graphs.append(graph)
        labels.append(ym)
    except Exception as e:
        print(f"Error processing structure: {e}")
        continue

# Create a dataset class
class MaterialDataset(DGLDataset):
    def __init__(self, graphs, labels):
        super().__init__(name="material_dataset")
        self.graphs = graphs
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.graphs)

    def __getitem__(self, idx):
        return self.graphs[idx], self.labels[idx]

# Instantiate dataset
dataset = MaterialDataset(graphs, labels)
print(f"Dataset size after preprocessing: {len(dataset)}")

##5. Model Definition
We define a simple GNN model using dgl to predict Young's modulus from graph representations.

In [None]:
class GNNModel(nn.Module):
    def __init__(self, in_feats, hidden_feats, out_feats):
        super(GNNModel, self).__init__()
        self.conv1 = GraphConv(in_feats, hidden_feats)
        self.conv2 = GraphConv(hidden_feats, out_feats)
        self.fc = nn.Linear(out_feats, 1)
        self.relu = nn.ReLU()

    def forward(self, g, features):
        h = self.conv1(g, features)
        h = self.relu(h)
        h = self.conv2(g, h)
        g.ndata['h'] = h
        hg = dgl.mean_nodes(g, 'h')  # Aggregate node features
        return self.fc(hg)

# Determine input feature size from the first graph
in_feats = graphs[0].ndata['atomic_numbers'].shape[1]
model = GNNModel(in_feats=in_feats, hidden_feats=64, out_feats=64)

## 6. Training
We split the data into training, validation, and test sets, then train the model.

In [None]:
# Step 6: Training (GPU-enabled)
# Split dataset: 80% train, 10% val, 10% test
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for batch_graphs, batch_labels in train_loader:
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.to(device)
        features = batch_graphs.ndata['atomic_numbers'].float()
        outputs = model(batch_graphs, features)
        loss = criterion(outputs.squeeze(), batch_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_graphs, batch_labels in val_loader:
            batch_graphs = batch_graphs.to(device)
            batch_labels = batch_labels.to(device)
            features = batch_graphs.ndata['atomic_numbers'].float()
            outputs = model(batch_graphs, features)
            loss = criterion(outputs.squeeze(), batch_labels)
            val_loss += loss.item()
    val_loss /= len(val_loader)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

# Evaluate on test set
model.eval()
test_loss = 0
with torch.no_grad():
    for batch_graphs, batch_labels in test_loader:
        batch_graphs = batch_graphs.to(device)
        batch_labels = batch_labels.to(device)
        features = batch_graphs.ndata['atomic_numbers'].float()
        outputs = model(batch_graphs, features)
        loss = criterion(outputs.squeeze(), batch_labels)
        test_loss += loss.item()
test_loss /= len(test_loader)
print(f"Test Loss: {test_loss:.4f}")

##7. Fabrication Method Suggestion
We define a simple function to suggest fabrication methods based on material type.

In [None]:
def suggest_fabrication(material_type):
    if material_type == "metal":
        return "Alloy Synthesis"
    elif material_type == "insulator":
        return "Sintering"
    else:
        return "Unknown (requires further analysis)"

## 8. Atomic Structure Visualization
We create a function to generate and save images of atomic structures.

In [None]:
def generate_image(structure, filename):
    atoms = AseAtomsAdaptor().get_atoms(structure)
    write(filename, atoms, format='png', rotation='10x,20y,30z')  # Rotate for better view

## 9. Prediction Interface
This function allows you to input a material ID and get predictions, fabrication suggestions, and a structure image.

In [None]:
def predict_material(material_id):
    try:
        doc = mpr.summary.search(material_ids=material_id, fields=fields)[0]
        structure = doc.structure
        actual_youngs_modulus = doc.elasticity.youngs_modulus if doc.elasticity else None
        material_type = "metal" if doc.is_metal else "insulator" if doc.is_insulator else "unknown"

        graph = s2g.get_graph(structure)[0]

        model.eval()
        with torch.no_grad():
            graph = graph.to(device)
            features = graph.ndata['atomic_numbers'].float()
            predicted_youngs_modulus = model(graph, features).item()

        fabrication_method = suggest_fabrication(material_type)
        image_filename = f"{material_id}.png"
        generate_image(structure, image_filename)

        print(f"\nMaterial ID: {material_id}")
        print(f"Predicted Young's Modulus: {predicted_youngs_modulus:.2f} GPa")
        if actual_youngs_modulus:
            print(f"Actual Young's Modulus: {actual_youngs_modulus:.2f} GPa")
        print(f"Material Type: {material_type}")
        print(f"Suggested Fabrication Method: {fabrication_method}")
        display(Image(filename=image_filename))
    except Exception as e:
        print(f"Error processing material ID {material_id}: {e}")

##10. Interactive Prediction
Run this cell to input your own material ID and get predictions. Use a material ID from the Materials Project (e.g., "mp-1234").

In [None]:
# Interactive input
user_material_id = input("Enter a Materials Project material ID (e.g., mp-1234): ")
predict_material(user_material_id)