In [None]:
%pip install tensorflow

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,InputLayer
import zipfile
import Generate_models
import importlib  
import Generate_models
importlib.reload(Generate_models)

In [None]:
outdirname = 'data/adult'
zipfilename = outdirname + '.zip'
with zipfile.ZipFile(zipfilename, 'r') as unzip:
        unzip.extractall(outdirname)
raw_data = np.genfromtxt(outdirname + '/adult.data',delimiter=', ', dtype=str, invalid_raise=False)


In [None]:
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'educational-num', 'marital-status', 'occupation',
                    'relationship', 'race', 'gender', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
                    'income']

adult_data = pd.DataFrame(raw_data, columns=column_names)

In [None]:
adult_data["age"]=adult_data["age"].apply(lambda x: int(x))
adult_data["fnlwgt"]=adult_data["fnlwgt"].apply(lambda x: float(x))
adult_data["educational-num"]=adult_data["educational-num"].apply(lambda x: int(x))
adult_data["capital-gain"]=adult_data["capital-gain"].apply(lambda x: float(x))
adult_data["capital-loss"]=adult_data["capital-loss"].apply(lambda x: float(x))
adult_data["hours-per-week"]=adult_data["hours-per-week"].apply(lambda x: float(x))


In [None]:
input_features=['age', 'workclass', 'fnlwgt', 'education', 'educational-num', 'marital-status', 'occupation',
                    'relationship', 'race', 'gender', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']
output="income"


In [None]:
adult_data

In [None]:
model = Sequential()
model.add(InputLayer(shape=(1,14)))

model.add(Dense(150, activation='relu'))  

# Hidden layers
model.add(Dense(64, activation='relu'))  
model.add(Dense(16, activation='relu'))  
model.add(Flatten()) 
model.add(Dense(1, activation='sigmoid'))  

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
Gm=Generate_models.Generate_model(data=adult_data,input_features=input_features,output=output,problem="classification",categorical_features=['workclass', 'education', 'marital-status', 'occupation',
                    'relationship', 'race', 'gender', 'native-country','income'],test_size=0.2,n_steps_in=1,n_steps_out=1,dType="notsequence")

In [None]:
model,xtrain,ytrain,xtest,ytest,encoder,norm=Gm.build_model(model=model,epochs=20)

In [None]:
model.save("./classification_models/adult.keras")

In [None]:
sample = adult_data.sample(1)
print("Sample from adult_data:")
print(sample)
sample_input = sample[input_features]

# Preprocess the sample using the same pipeline as training data
# If Gm has a transform/preprocess method, use it. Otherwise, use encoder and norm manually.
# Example using Gm (adjust if your method name is different):
if hasattr(Gm, "transform"):
	processed_sample = Gm.transform(sample_input)
else:
	# Manual preprocessing (encoding + normalization)
	# Encode categorical features
	sample_encoded = sample_input.copy()
	categorical_features = ['workclass', 'education', 'marital-status', 'occupation',
							'relationship', 'race', 'gender', 'native-country']
	for i, col in enumerate(categorical_features):
		le = encoder[i]
		sample_encoded[col] = le.transform(sample_encoded[col])
	# Normalize numerical features
	numerical_features = ['age', 'workclass', 'fnlwgt', 'education', 'educational-num', 'marital-status', 'occupation',
                    'relationship', 'race', 'gender', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']
	sample_encoded[numerical_features] = norm[0].transform(sample_encoded[numerical_features])
	processed_sample = sample_encoded.values.astype(np.float32)
	
	processed_sample = processed_sample.reshape(1, 1, -1)

# Predict
if hasattr(Gm, "transform"):
	pred = model.predict(processed_sample)
else:
	pred = model.predict(processed_sample)

pred = np.round(pred).astype(int)
print("Sample input:")
print(sample_input)
print("Predicted output:")
print(pred)
print("Real output (from adult_data):")
print(sample[output])


In [None]:
import Cf_explanation as cfe

In [None]:
query_instance1=xtest[0]
pr1=np.array(model.predict(np.reshape(query_instance1,(1,1,14)), verbose=0)).flatten()[0]
query_instance2=xtest[80]
pr11=np.array(model.predict(np.reshape(query_instance2,(1,1,14)), verbose=0)).flatten()[0]
pr1,pr11

In [None]:
population=np.concatenate((xtrain[0:50],xtest[5:50]))

In [None]:
cf = cfe.Conterfactual(
    model=model,
    query_instance=query_instance1,
    problem="classification",   # You can set this to "regression" 
    total_CFs=5,               # Number of counterfactuals you want
    number_of_features=14,
    length=1,
    permitted_features=[1,2,4,6,7,10,11,12],
    correlated_feature=[],# Assuming no correlation for now
    direction="greater"       
)

# Generate counterfactual explanations
best_population = cf.Explane(population,method="NSGA2",max_itera=100)

# Print the resulting counterfactuals
print("Counterfactual explanations generated:")
print(best_population)

In [None]:
len(best_population[0])

In [None]:
plt.plot(best_population[1][4:])
plt.ylabel("fitness value")
plt.xlabel("iteration")
 #fitness updates

In [None]:
input_columns=['age', 'workclass', 'fnlwgt', 'education', 'educational-num', 'marital-status', 'occupation','relationship', 'race', 'gender', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']
categorical_columns=['workclass', 'education', 'marital-status', 'occupation','relationship', 'race', 'gender', 'native-country']

    


    


In [None]:
p=cf.Visualization(best_population[0],input_columns=input_columns,categorical_columns=categorical_columns,encoder=encoder,norm=norm[0])

In [None]:
p

In [None]:
cf = cfe.Conterfactual(
    model=model,
    query_instance=query_instance1,
    problem="classification",   # You can set this to "regression" 
    total_CFs=5,               # Number of counterfactuals you want
    number_of_features=14,
    length=1,
    permitted_features=[], # do not change[age,education,marital-status,race,gender,native-country]
    correlated_feature=[],# Assuming no correlation for now
    direction="greater"       
)

# Generate counterfactual explanations
best_population = cf.Explane(population,method="GENO-TOPSIS",max_itera=100)

# Print the resulting counterfactuals
print("Counterfactual explanations generated:")
print(best_population)

In [None]:
p=cf.Visualization(best_population[0],input_columns=input_columns,categorical_columns=categorical_columns,encoder=encoder,norm=norm[0])

In [None]:
p

In [None]:
cf.Distribution(best_population[0])

In [None]:
pip install dice-ml

In [None]:
# Sklearn imports
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier

# DiCE imports
import dice_ml
from dice_ml.utils import helpers  # helper functions

In [None]:
dataset = helpers.load_adult_income_dataset()

In [None]:
dataset.head()

In [None]:
# description of transformed features
adult_info = helpers.get_adult_data_info()
adult_info

In [None]:
target = dataset["income"]
train_dataset, test_dataset, y_train, y_test = train_test_split(dataset,
                                                                target,
                                                                test_size=0.2,
                                                                random_state=0,
                                                                stratify=target)
x_train = train_dataset.drop('income', axis=1)
x_test = test_dataset.drop('income', axis=1)

In [None]:
# Step 1: dice_ml.Data
d = dice_ml.Data(dataframe=train_dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')

In [None]:
numerical = ["age", "hours_per_week"]
categorical = x_train.columns.difference(numerical)

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

transformations = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical)])

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(steps=[('preprocessor', transformations),
                      ('classifier', RandomForestClassifier())])
model = clf.fit(x_train, y_train)

In [None]:
# Using sklearn backend
m = dice_ml.Model(model=model, backend="sklearn")
# Using method=random for generating CFs
exp = dice_ml.Dice(d, m, method="random")

In [None]:
e1 = exp.generate_counterfactuals(x_test[0:1], total_CFs=2, desired_class="opposite")
e1.visualize_as_dataframe(show_only_changes=True)

In [None]:
model.n_features_in_