In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.preprocessing import StandardScaler

# Load dataset (replace with your dataset path)
file_path = r'/content/dataset.csv'
data = pd.read_csv(file_path)

# Define element composition columns (updated list of elements)
expected_columns = [
    'Al', 'Co', 'Cr', 'Fe', 'Ni', 'Cu', 'Mn', 'Ti',
    'V', 'Nb', 'Mo', 'Zr', 'Hf', 'Ta', 'W', 'Si'
]

# Filter available columns
available_columns = [col for col in expected_columns if col in data.columns]
if not available_columns:
    raise ValueError("No expected columns found in the dataset. Check the dataset structure.")

# Select element composition data
element_data = data[available_columns]

# Standardize the data (VIF is scale-sensitive)
scaler = StandardScaler()
element_data_scaled = scaler.fit_transform(element_data)

# Compute VIF for each feature
vif_data = pd.DataFrame({
    "Feature": available_columns,
    "VIF": [variance_inflation_factor(element_data_scaled, i) for i in range(element_data_scaled.shape[1])]
})

# Print VIF results
print(vif_data)

# List of elements to highlight in red
red_features = ['Cr', 'Fe', 'Ni', 'V', 'Nb', 'Ti', 'Zr']

# Plot VIF values with color change based on specific features
plt.figure(figsize=(12, 6))

# Assign bar colors based on whether the feature is in the red_features list
bar_colors = ['red' if feature in red_features else 'skyblue' for feature in vif_data["Feature"]]

bars = plt.bar(
    vif_data["Feature"], vif_data["VIF"],
    color=bar_colors, edgecolor='black', linewidth=2  # Thick outline
)

# Add threshold lines (updated for moderate VIF=40)
plt.axhline(y=40, color='r', linestyle='--', label='Moderate Multicollinearity (VIF > 40)')
plt.axhline(y=10, color='orange', linestyle='--', label='Low Multicollinearity (VIF ≤ 10)')

# Add plot details
plt.xlabel('Features')
plt.ylabel('VIF')
plt.title('Variance Inflation Factor (VIF) Analysis')
plt.xticks(rotation=45, ha='right')
plt.legend()
plt.tight_layout()

# Save plot as PNG file
plt.savefig('/content/vif_analysis_with_red_features.png')

# Display the plot
plt.show()
