In [2]:
# Instalar dependências se necessário
# %pip install pymatgen matminer pandas numpy matplotlib seaborn scikit-learn

import os, pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pymatgen.core import Composition

from matminer.featurizers.composition import ElementProperty, ValenceOrbital, BandCenter
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# =========================================================
# 1) Definir metais da POC
# =========================================================
metals = ["Rh","Ir","Ni","Pd","Pt","Cu","Ag","Au","Pb"]
df = pd.DataFrame({"composition":[Composition(m) for m in metals]})

# =========================================================
# 2) Gerar features com Matminer
# =========================================================
# ElementProperty (preset "magpie") -> Mendeleev, Tmelt, raio covalente, eletronegatividade, etc.
fe_magpie = ElementProperty.from_preset("magpie")
df_mag = fe_magpie.featurize_dataframe(df.copy(), "composition", ignore_errors=True)

# Valence orbitals fractions
fe_val = ValenceOrbital()
df_val = fe_val.featurize_dataframe(df.copy(), "composition", ignore_errors=True)

# d-band center proxy
fe_db = BandCenter()
df_db = fe_db.featurize_dataframe(df.copy(), "composition", ignore_errors=True)

# Unir todos
df_feat = df_mag.join(df_val.drop(columns=["composition"]), how="left")
df_feat = df_feat.join(df_db.drop(columns=["composition"]), how="left")

# =========================================================
# 3) Selecionar subset relevante
# =========================================================
subset_cols = [
    "composition",
    "MagpieData mean Number",          # Z
    "MagpieData mean MendeleevNumber",
    "MagpieData mean AtomicWeight",
    "MagpieData mean MeltingT",
    "MagpieData mean CovalentRadius",
    "MagpieData mean Electronegativity",
    "MagpieData mean ElectronAffinity",
    "frac s valence electrons",
    "frac p valence electrons",
    "frac d valence electrons",
    "frac f valence electrons",
    "band center"
]
subset_cols = [c for c in subset_cols if c in df_feat.columns]
df_sel = df_feat[subset_cols].copy()

# Renomear
rename_map = {
    "composition": "metal",
    "MagpieData mean Number": "Z",
    "MagpieData mean MendeleevNumber": "Mendeleev",
    "MagpieData mean AtomicWeight": "AtomicWeight",
    "MagpieData mean MeltingT": "Tmelt_K",
    "MagpieData mean CovalentRadius": "rcov_A",
    "MagpieData mean Electronegativity": "Electronegativity",
    "MagpieData mean ElectronAffinity": "ElectronAffinity",
    "frac s valence electrons": "val_s_frac",
    "frac p valence electrons": "val_p_frac",
    "frac d valence electrons": "val_d_frac",
    "frac f valence electrons": "val_f_frac",
    "band center": "d_band_center",
}
df_sel.rename(columns=rename_map, inplace=True)

# =========================================================
# 4) Salvar em CSV
# =========================================================
df_sel.to_csv("metal_features_poc.csv", index=False)
print("Arquivo salvo: metal_features_poc.csv")
display(df_sel)

# =========================================================
# 5) Análises de correlação e PCA
# =========================================================
num_cols = [c for c in df_sel.columns if c != "metal"]
X = df_sel[num_cols].to_numpy(dtype=float)

# Heatmap de correlação
plt.figure(figsize=(10,7))
sns.heatmap(df_sel[num_cols].corr(), annot=True, fmt=".2f", cmap="coolwarm", center=0)
plt.title("Correlação entre features")
plt.show()

# PCA
X_std = StandardScaler().fit_transform(X)
pca = PCA().fit(X_std)

plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_), marker="o")
plt.xlabel("Número de componentes principais")
plt.ylabel("Variância explicada acumulada")
plt.title("PCA — variância explicada")
plt.grid(True)
plt.show()

# Loadings (importância das variáveis na PC1 e PC2)
loadings = pd.DataFrame(pca.components_.T,
                        index=num_cols,
                        columns=[f"PC{i+1}" for i in range(len(num_cols))])
print("Contribuição das variáveis nas PCs:")
display(loadings.head())


ModuleNotFoundError: No module named 'pandas'

In [3]:
import pandas as pd

# Selecionar as 4 últimas features (sem a coluna 'metal')
final_cols = ["Mendeleev", "Tmelt_K", "rcov_A", "d_band_center"]
df_features4 = df_final[final_cols].copy()

# Conferir resultado
display(df_features4)

# Salvar em CSV
df_features4.to_csv("metal_features_ultimas4.csv", index=False)
print("✅ Dataset salvo como metal_features_ultimas4.csv")


Unnamed: 0,Mendeleev,Tmelt_K,rcov_A,d_band_center
0,59.0,2237.0,142.0,4.2983
1,60.0,2739.0,141.0,5.343076
2,61.0,1728.0,124.0,4.400394
3,62.0,1828.05,139.0,4.447036
4,63.0,2041.4,136.0,5.572658
5,64.0,1357.77,132.0,4.477094
6,65.0,1234.93,145.0,4.439262
7,66.0,1337.33,136.0,5.767517
8,81.0,600.61,146.0,3.890444


✅ Dataset salvo como metal_features_ultimas4.csv


In [2]:
# Selecionar apenas as colunas escolhidas
final_cols = ["metal", "Mendeleev", "Tmelt_K", "rcov_A", "d_band_center"]
df_final = df_sel[final_cols].copy()

# Conferir resultado
display(df_final)

# Salvar em CSV
df_final.to_csv("metal_features_reduzido.csv", index=False)
print("Dataset reduzido salvo como metal_features_reduzido.csv")


Unnamed: 0,metal,Mendeleev,Tmelt_K,rcov_A,d_band_center
0,(Rh),59.0,2237.0,142.0,4.2983
1,(Ir),60.0,2739.0,141.0,5.343076
2,(Ni),61.0,1728.0,124.0,4.400394
3,(Pd),62.0,1828.05,139.0,4.447036
4,(Pt),63.0,2041.4,136.0,5.572658
5,(Cu),64.0,1357.77,132.0,4.477094
6,(Ag),65.0,1234.93,145.0,4.439262
7,(Au),66.0,1337.33,136.0,5.767517
8,(Pb),81.0,600.61,146.0,3.890444


Dataset reduzido salvo como metal_features_reduzido.csv
