In [3]:
import pandas as pd
import numpy as np
import networkx as nx
from cdt.causality.graph import PC, Notears

# Load datasets
low_scrap = pd.read_csv('/teamspace/studios/this_studio/dataset/low_scrap.csv')
high_scrap = pd.read_csv('/teamspace/studios/this_studio/dataset/high_scrap.csv')

# Automatically infer the structure based on columns
num_features = low_scrap_df.shape[1]
station_order = [i // 20 for i in range(num_features)]  # 5 stations, 20 features per station

# Generate constraints to ensure each feature can influence only itself or subsequent stations
def create_constraints(station_order):
    constraints = []
    for i in range(num_features):
        for j in range(i + 1, num_features):
            if station_order[i] <= station_order[j]:  # Only influence the same or subsequent stations
                constraints.append((i, j))
    return constraints

# Get the constraints based on the station order
constraints = create_constraints(station_order)

# Use a causal discovery model like Notears, respecting the constraints
causal_model = Notears()

# Fit the model to low and high scrap datasets
low_scrap_graph = causal_model.predict(low_scrap_df, constraints=constraints)
high_scrap_graph = causal_model.predict(high_scrap_df, constraints=constraints)

# Extract adjacency matrices for comparison
low_scrap_adj_matrix = nx.adjacency_matrix(low_scrap_graph).todense()
high_scrap_adj_matrix = nx.adjacency_matrix(high_scrap_graph).todense()

# Save the adjacency matrix for high scrap
np.savetxt("high_scrap_adj_matrix.txt", high_scrap_adj_matrix, fmt='%d')

# Rank influences on target variable 'Station5_mp_85'
target_index = high_scrap_df.columns.get_loc("Station5_mp_85")
influences = high_scrap_adj_matrix[:, target_index].flatten()

# Sort influences by strength on the target variable
influences_sorted_indices = np.argsort(-influences)
ranked_influences = [(high_scrap_df.columns[idx], influences[idx]) for idx in influences_sorted_indices if influences[idx] > 0]

# Display top influences
print("Top influences on Station5_mp_85:", ranked_influences)


ImportError: cannot import name 'Notears' from 'cdt.causality.graph' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/cdt/causality/graph/__init__.py)

In [4]:
import pandas as pd
import numpy as np
import networkx as nx
from cdt.causality.graph import Notears
import matplotlib.pyplot as plt

# Load datasets
low_scrap = pd.read_csv('/teamspace/studios/this_studio/dataset/low_scrap.csv')
high_scrap = pd.read_csv('/teamspace/studios/this_studio/dataset/high_scrap.csv')


# Inferring station structure
num_features = low_scrap_df.shape[1]
station_order = [i // 20 for i in range(num_features)]  # 5 stations, 20 measurements per station

# Generate constraints to respect sequential station influence
def create_constraints(station_order):
    constraints = []
    for i in range(num_features):
        for j in range(i + 1, num_features):
            if station_order[i] <= station_order[j]:  # Constrain to self and subsequent stations
                constraints.append((i, j))
    return constraints

constraints = create_constraints(station_order)

# Initialize causal model with constraints
causal_model = Notears()
high_scrap_graph = causal_model.predict(high_scrap_df, constraints=constraints)

# Convert causal graph to adjacency matrix and save
high_scrap_adj_matrix = nx.adjacency_matrix(high_scrap_graph).todense()
np.savetxt("high_scrap_adj_matrix.txt", high_scrap_adj_matrix, fmt='%d')

# Calculate differences in means and variances
mean_diffs = high_scrap_df.mean() - low_scrap_df.mean()
var_diffs = high_scrap_df.var() - low_scrap_df.var()

# Analyze influence on Station5_mp_85
target_index = high_scrap_df.columns.get_loc("Station5_mp_85")
influences = high_scrap_adj_matrix[:, target_index].flatten()

# Rank and combine structural and distributional shifts
ranked_influences = [(high_scrap_df.columns[idx], influences[idx], mean_diffs[idx], var_diffs[idx]) 
                     for idx in np.argsort(-influences) if influences[idx] > 0]

# Display top influences
print("Top factors influencing Station5_mp_85:")
for feature, influence, mean_diff, var_diff in ranked_influences[:10]:
    print(f"{feature} | Influence: {influence}, Mean Diff: {mean_diff}, Var Diff: {var_diff}")

# Visualize the causal graph
plt.figure(figsize=(12, 8))
nx.draw(high_scrap_graph, with_labels=True, font_weight='bold')
plt.title("Causal Graph for High Scrap Data")
plt.show()


ImportError: cannot import name 'Notears' from 'cdt.causality.graph' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/cdt/causality/graph/__init__.py)