In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [None]:
file_location = '/kaggle/input/cdc-brfss-mmsa-2022/MMSA_2022.csv'

data = pd.read_csv(file_location)
data.info()

In [None]:
def calc_cost(y_true,y_pred,cost_matrix):
    conf = confusion_matrix(y_true,y_pred).T
    return np.sum(conf * cost_matrix)

In [None]:
def find_best_params(estimator,params,train_set,validation_set,cost_matrix,starting_point=0):
    train_x = train_set[0]
    train_y = train_set[1]
    test_x = validation_set[0]
    test_y = validation_set[1]
    
    min_cost = np.inf
    best_params = {}  
    full_params_set = []
    
    if type(params) == dict:
        
        for values in product(*params.values()):
            point = dict(zip(params.keys(), values))
            full_params_set.append(point)
        
    elif type(params) == list:
        
        for params_subset in params:
            for values in product(*params_subset.values()):
                point = dict(zip(params_subset.keys(), values))
                full_params_set.append(point)
    
    else:
        return []
    
    steps = len(full_params_set)
    counter = starting_point
    print(f"Testing {steps} models in total.")
    start = time.time()
    
    performance_log = []
    
    for params in full_params_set[starting_point:]:

        estimator.set_params(**stuff)
        estimator.fit(train_x,train_y)
        pred_y = estimator.predict(test_x)
        cost_matrix = np.matrix([[0,1],[40,0]])
        cost = calc_cost(test_y,pred_y,cost_matrix)

        if cost < min_cost:
            min_cost = cost
            best_params = params
        
        log = f"{counter}/{steps} | Score: {acc} | Elapsed: {int((time.time()-start)*100)/100}s | {stuff}"
        performance_log.append(log)
        print("________________________________________________________________________________________")
        print(log)
        print("________________________________________________________________________________________")
        counter += 1
        if counter % 10 == 0:
            with open("perflogs.txt","w") as f:
                f.write("\n".join(performance_log))

    return best_params

In [None]:
linear_params = {"C":[0.1,1.0,10.0,100.0],"kernel":["linear"]}

LE = LabelEncoder()
data_encoded = data.apply(LE.fit_transform,axis=1)

target_column = "_MICHD"

y = data_encoded[target_column]
x = data_encoded.drop(columns=[target_column])

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train,test_size=0.25)

In [None]:
best_params = find_best_params(SVC(),linear_params,[x_train,y_train],[x_val,y_val])

In [None]:
print(best_params)

In [None]:
classifier = SVC(*best_params)
classifier.fit(x_train,y_train)
weights = zip(list(data.columns),classifier.coef_.todense().data)
weights_sorted = {k: v for k, v in sorted(weights.items(), key=lambda item: item[1])}
with open("feature_importances.json","w") as f:
    json.dump(weights_sorted,f)