In [None]:
# import necessary libraries for data manipulation, model evaluation, and plotting
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, precision_recall_fscore_support
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from tensorboard.backend.event_processing import event_accumulator
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, RocCurveDisplay

import os
import pandas as pd
import datetime

import torch
from torch.optim import AdamW  # variant of Adam with weight decay
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2ForSequenceClassification, GPT2Tokenizer, TrainingArguments, Trainer

import json
import numpy as np
from transformers import EarlyStoppingCallback

from sklearn.utils.class_weight import compute_class_weight

import datetime
import sklearn
from transformers import get_linear_schedule_with_warmup
from transformers import EarlyStoppingCallback


In [None]:
# Cell 1
# data preprocessing

# need to have separate models for each sdoh
current_sdoh = "behavior_alcohol"

# can group sdoh together based on similar values

# community present/absent and education
sdoh_community_education = {
      0: 'False',
      1: 'True',
}

# economics and environment
sdoh_economics_environment = {
      0: 'None',
      1: 'True',
      2: 'False',
}

# alcohol, drug, tobacco
sdbh_alcohol_drug_tobacco = {
      0: 'None',
      1: 'Present',
      2: 'Past',
      3: 'Never',
      4: 'Unsure'
}

def balance_data(df):
    values = df['y'].value_counts()
    majority = df[df['y'] == values.idxmax()]
    desired_samples = len(majority)

    for label in values.index:
        if label == values.idxmax():
            continue
        minority = df[df['y'] == label]
        upsampled_minority = resample(minority,
                                      replace=True,  # Sample with replacement
                                      n_samples=desired_samples,  # Match number of majority class
                                      random_state=42)
        majority = pd.concat([majority, upsampled_minority])

    return majority

# NEW function to determine which SDOH is described
def set_sdoh(sdoh_group):
  global current_sdoh
  current_sdoh = sdoh_group

# test_train_split for all SDoH from the preprocessed data
# load the preprocessed dataset from a CSV file
dataset = pd.read_csv("/content/PREPROCESSED-NOTES.csv")

# extract text data and specific SDoH categories from the dataset
text_data = dataset["text"].to_list()
sdoh_data = {
    "sdoh_community_present": dataset["sdoh_community_present"].to_list(),
    "sdoh_community_absent": dataset["sdoh_community_absent"].to_list(),
    "sdoh_education": dataset["sdoh_education"].to_list(),
    "sdoh_economics": dataset["sdoh_economics"].to_list(),
    "sdoh_environment": dataset["sdoh_environment"].to_list(),
    "behavior_alcohol": dataset["behavior_alcohol"].to_list(),
    "behavior_tobacco": dataset["behavior_tobacco"].to_list(),
    "behavior_drug": dataset["behavior_drug"].to_list()
}

# prepare directories for storing train-test split data for each SDoH category
base_path = 'test_train_split/behavior_drug'
os.makedirs(base_path, exist_ok=True)

# Iterate through each SDOH data category to split and save as separate CSV files
for category, data in sdoh_data.items():
    base_path = f"test_train_split/{category}"
    os.makedirs(base_path, exist_ok=True)

    # Split data for the current category into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(
        text_data, data, random_state=0, train_size=0.8, stratify=data
    ) #maybe try a different test size (0.7/0.3)

    #########
     # Convert training set to DataFrame for oversampling
    train_df = pd.DataFrame({
        'text': X_train,
        'y': y_train
    })

     # Apply the oversampling function here
    balanced_train_df = balance_data(train_df)

    # Extract the balanced data
    X_train_balanced = balanced_train_df['text'].tolist()
    y_train_balanced = balanced_train_df['y'].tolist()

    # Save all splits as CSV files
    pd.DataFrame({"text":  X_train_balanced}).to_csv(f"{base_path}/X_train.csv", index=False)
    pd.DataFrame({"text": X_val}).to_csv(f"{base_path}/X_val.csv", index=False)
    pd.DataFrame({category: y_train_balanced}).to_csv(f"{base_path}/y_train.csv", index=False)
    pd.DataFrame({category: y_val}).to_csv(f"{base_path}/y_val.csv", index=False)


In [None]:
# Cell 2
# model training and evaluation setup

# install required libraries
!pip install transformers[torch] accelerate -U plotting

label_columns = [
    'sdoh_community_present', 'sdoh_community_absent', 'sdoh_education',
    'sdoh_economics', 'sdoh_environment', 'behavior_alcohol',
    'behavior_tobacco', 'behavior_drug'
]

# Initialize tokenizer and model configuration
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=len(label_columns))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Compute class weights for each category
class_weights_dict = {}
for category in label_columns:
    labels = dataset[category].values
    weights = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
    class_weights_dict[category] = torch.tensor(weights, dtype=torch.float32)

# Compute class weights for each label
class_weights = []
for category in label_columns:
    labels = dataset[category].values
    weights = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
    class_weights.append(torch.tensor(weights, dtype=torch.float).to(device))

def save_metrics_to_csv(json_filepath, csv_filename):
    with open(json_filepath) as file:
        data = json.load(file)

        log_history = data['log_history'] #focus on this column for history
        df = pd.DataFrame(log_history) # Convert the list of dictionaries to a DataFrame

        df.to_csv(csv_filename, index=False)

def plot_metric_from_tensor(log_dir, output_dir, steps_per_epoch):

 # Calculate steps_per_epoch based on training data and training arguments
    event_acc = event_accumulator.EventAccumulator(log_dir)
    event_acc.Reload()

    graph1_data = event_acc.Scalars("eval/loss")
    graph2_data = event_acc.Scalars("train/loss")

    # convert steps to epochs ####################
    epochs1 = [event.step for event in graph1_data]
    values1 = [event.value for event in graph1_data]

    epochs2 = [event.step for event in graph2_data]
    values2 = [event.value for event in graph2_data]

    plt.figure(figsize=(10, 6))

    plt.plot(epochs1, values1, label="Validation Loss")
    plt.plot(epochs2, values2, label="Train Loss")

    plt.legend()

    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Overlap")

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save the graph to the specified folder
    plt.savefig(os.path.join(output_dir, 'metrics_plot.png'))
    plt.show()


'''def compute_metrics(pred): #evaluation metrics function
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    f1 = f1_score(labels, preds, average='weighted')
    accuracy = accuracy_score(labels, preds)
    auc = roc_auc_score(labels, preds, average='weighted', multi_class='ovr')

     # Confusion Matrix
    # cm = ConfusionMatrixDisplay.from_predictions(labels, preds)
    # cm.plot()
    # plt.show()
'''

# Define a function to update the report with the correct label names
def update_report(report, sdoh_dict):
    updated_report = {}
    for key, value in sdoh_dict.items():
        str_key = str(key)  # Ensure the key is a string because JSON keys are always strings
        if str_key in report:
            updated_report[value] = report[str_key]
    return updated_report

'''def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    accuracy = accuracy_score(labels, preds)
    auc = roc_auc_score(labels, preds, average='weighted', multi_class='ovr')

    # Generate the initial classification report
    report = classification_report(labels, preds, output_dict=True)

    # Determine which mapping to use based on current_sdoh
    if current_sdoh.startswith("behavior"):
        current_sdoh_dict = sdbh_alcohol_drug_tobacco
    elif current_sdoh in ["sdoh_economics", "sdoh_environment"]:
        current_sdoh_dict = sdoh_economics_environment
    else:
        current_sdoh_dict = sdoh_community_education

    # Update the report with correct label names
    updated_report = update_report(report, current_sdoh_dict)

    print(f'Classification Report for {current_sdoh}:')
    for label, metrics in updated_report.items():
        print(f'{label}:')
        print(f' Precision: {metrics["precision"]}')
        print(f' Recall: {metrics["recall"]}')
        print(f' F1-score: {metrics["f1-score"]}')
        print(f' Support: {metrics["support"]}')
        print('')

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'auc': auc
    }
'''
def compute_metrics(pred):
    # Extract the true labels and predictions
    true_labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    # Initialize metrics
    accuracy = accuracy_score(true_labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, preds, average='weighted')
    auc = roc_auc_score(true_labels, preds, average='weighted', multi_class='ovr')

    # Determine category based on current_sdoh and prepare the report
    if current_sdoh.startswith("behavior"):
        current_sbdh_dict = sdbh_alcohol_drug_tobacco
    elif current_sdoh == "sdoh_economics" or current_sdoh == "sdoh_environment":
        current_sbdh_dict = sdoh_economics_environment
    else:  # This includes 'sdoh_community_education'
        current_sbdh_dict = sdoh_community_education

    # Remap numerical labels to categorical labels for the report
    report_labels = [current_sbdh_dict[label] for label in true_labels]

    # Generate and print the classification report
    print(f'Classification Report for {current_sdoh}:')
    report = classification_report(true_labels, preds, target_names=report_labels, output_dict=True)
    for label, metrics in report.items():
        if label.isdigit():  # Check if label is one of the numerical labels
            label_name = current_sbdh_dict[int(label)]
            print(f'{label_name}:')
            print(f' Precision: {metrics["precision"]}')
            print(f' Recall: {metrics["recall"]}')
            print(f' F1-score: {metrics["f1-score"]}')
            print(f' Support: {metrics["support"]}')
            print('')

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'auc': auc
    }

def get_latest_checkpoint(folder_path):
    # Get a list of all files and directories in the specified folder
    files_and_dirs = os.listdir(folder_path)

    # Filter only directories (assumed to be checkpoints)
    checkpoint_dirs = [d for d in files_and_dirs if os.path.isdir(os.path.join(folder_path, d))]

    if not checkpoint_dirs:
        print("No checkpoint directories found.")
        return None

    # Extract the checkpoint numbers from the directory names
    checkpoint_numbers = [int(d.split('-')[1]) for d in checkpoint_dirs]

    # Identify the directory with the highest checkpoint number
    latest_checkpoint = os.path.join(folder_path, f"checkpoint-{max(checkpoint_numbers)}")

    return latest_checkpoint



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Cell 3
# model training and saving

# install required libraries
!pip install transformers[torch] accelerate -U plotting

# Define label_columns here
# either turn all into binary(try this first as 0 or 1) or train different models
label_columns = [
    'sdoh_community_present', 'sdoh_community_absent', 'sdoh_education',
    'sdoh_economics', 'sdoh_environment', 'behavior_alcohol',
    'behavior_tobacco', 'behavior_drug'
]

# Initialize tokenizer, this is standard approach with GPT-2
num_labels = len(label_columns)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", num_labels = num_labels)
tokenizer.pad_token = tokenizer.eos_token
configuration = GPT2ForSequenceClassification.config_class.from_pretrained("gpt2", num_labels= num_labels)
configuration.pad_token_id = tokenizer.pad_token_id
model = GPT2ForSequenceClassification(configuration)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


# Set up no decay for certain model parameters to avoid regularization on them
no_decay = ['bias', 'LayerNorm.weight']  # weight decay with a minor penalty during
optimizer_grouped_parameters = [  # no selects params added
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5)

# load dataset, preprocess, and prepare DataLoader for training and validation
dataset = pd.read_csv("/content/PREPROCESSED-NOTES.csv")
text_data = dataset["text"].to_list()
sdoh_data = dataset["sdoh_community_present"].to_list() # need to load 8 instead of just community_present that's why the loss doesn't make sense

# can either do binary and multilevel classification or one model per label
# one sdoh per model, don't need to convert to binary because every model will be different

timestamp_fortrain = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(text_data, sdoh_data, random_state=0, train_size=0.8,
                                                  stratify=sdoh_data) #make it test = 0.3
max_seq_length = 100  # actually 50 but increase to accomadate outliers

# Calculate the number of trainable parameters in the model
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
model_size_MB = num_trainable_params * 4 / (1024 ** 2)
effective_batch = 8 / (50*4*model_size_MB) #gpu/seqlength * 4 * model size

# define training arguments and start training with the Trainer
train_encodings = tokenizer(X_train, truncation=True, padding='max_length', max_length=max_seq_length, return_tensors='pt')
val_encodings = tokenizer(X_val, truncation=True, padding='max_length', max_length=max_seq_length, return_tensors='pt')


# custom Dataset class for loading training and validation data
class DataLoader(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = torch.tensor(labels, dtype=torch.long)  # Converting to tensor , maybe use just 'labels'

    def __getitem__(self, idx):
        try:
            item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
            item['labels'] = self.labels[idx].clone().detach()  # Already a tensor, just clone and detach
            return item
        except Exception as e:
            print(f"index error: {idx}: {e}")
            return None

    def __len__(self):
        return len(self.labels) # detach from tensor device

# Initialize the DataLoader for training and validation sets with the tokenized encodings
train_dataset: DataLoader = DataLoader(
    train_encodings,  # These should be the output from the tokenizer
    y_train  # These should be labels, as a list or tensor
)

val_dataset = DataLoader(
    val_encodings,  # These should be the output from the tokenizer
    y_val  # These should be labels, as a list or tensor
)

tensor_logs = f'./logs/tensor_logs/{timestamp_fortrain}' #create seperate logs for tensor/epoch
os.makedirs(tensor_logs, exist_ok=True)
epoch_logs = f'./logs/epoch_logs/{timestamp_fortrain}'
os.makedirs(epoch_logs, exist_ok=True)

# training args - need to adjust
training_args = TrainingArguments(
    output_dir= epoch_logs,  # change to epoch log directory, convert to a text
    logging_strategy='epoch',  # characterize as epoch
    num_train_epochs=4,
    #per_device_train_batch_size=64,  # cpu constraint,  64 approp
    per_device_train_batch_size=16, #reduced batch sie
    per_device_eval_batch_size=64,  # gradient accum if batch size of two, 64 approp
    save_strategy= 'epoch',
    warmup_steps=500,
    weight_decay=1e-5,
    logging_dir= tensor_logs,  # change to tensor logs
    #eval_steps=100,
    evaluation_strategy="epoch",
    #accumulate gradients over 4 steps
    #gradient_accumulation_steps = 4
    load_best_model_at_end=True,  # This will load the best model at the end of training
    metric_for_best_model="eval_loss",  # Use eval_loss to determine the best model
    greater_is_better=False,  # Set to False because a lower loss is better

)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)], # 3 is a balance between giving the model enough chance  to improve and stopping early enough to prevent overfitting and unnecessary computation
)

# train the model
trainer.train()

# evaluate the model
evaluation_results = trainer.evaluate()

#readable results
steps_per_epoch = len(train_dataset) // training_args.per_device_train_batch_size
latest_checkpoint = get_latest_checkpoint(epoch_logs) # latest checkpoint update to csv
json_path = os.path.join(latest_checkpoint, 'trainer_state.json')
save_metrics_to_csv(json_path, 'eval_metric.csv') #update metrics
plot_metric_from_tensor(tensor_logs, 'graphs', steps_per_epoch)

save_directory = "saved_models/gpt2"

os.makedirs(save_directory, exist_ok=True)
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

print("Evaluation Results:", evaluation_results)



Epoch,Training Loss,Validation Loss


Classification Report for behavior_alcohol:


ValueError: Number of classes, 2, does not match size of target_names, 1405. Try specifying the labels parameter

save at epoch, and evaluate at epochs load only the best checkpoint during the testing by looking at the training and validation

In [None]:
# Cell 4
# Evaluation on Test Data

import torch
from torch.utils.data import DataLoader, Dataset
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np

# Update the label_mappings based on your specific labels and categories
label_mappings = {
    'sdoh_community_education': {0: 'False', 1: 'True'},
    'sdoh_economics_environment': {0: 'None', 1: 'True', 2: 'False'},
    'sdbh_alcohol_drug_tobacco': {0: 'None', 1: 'Present', 2: 'Past', 3: 'Never', 4: 'Unsure'}
}

def get_category(label):
    if label in ['sdoh_community_present', 'sdoh_community_absent', 'sdoh_education']:
        return 'sdoh_community_education'
    elif label in ['sdoh_economics', 'sdoh_environment']:
        return 'sdoh_economics_environment'
    else:  # This covers behavior_alcohol, behavior_tobacco, behavior_drug
        return 'sdbh_alcohol_drug_tobacco'

def evaluate_on_test_data(model_path, test_data_path, tokenizer_path, label_columns, max_seq_length=512):
    tokenizer = GPT2Tokenizer.from_pretrained(tokenizer_path)
    model = GPT2ForSequenceClassification.from_pretrained(model_path)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    test_dataset = pd.read_csv(test_data_path)
    texts = test_dataset["TEXT"].tolist()
    test_encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_seq_length, return_tensors='pt')

    class TestDataset(Dataset):
        def __init__(self, encodings):
            self.encodings = encodings

        def __getitem__(self, idx):
            return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

        def __len__(self):
            return len(self.encodings.input_ids)

    test_dataset = TestDataset(test_encodings)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    predictions = {label: [] for label in label_columns}
    results = []

    model.eval()
    with torch.no_grad():
        for batch in test_loader:
            inputs = {key: val.to(device) for key, val in batch.items()}
            outputs = model(**inputs)
            logits = outputs.logits
            sigmoid_logits = torch.sigmoid(logits).cpu().numpy()
            threshold = 0.5
            binary_predictions = (sigmoid_logits > threshold).astype(int)
            for i, label in enumerate(label_columns):
                predictions[label].extend(binary_predictions[:, i])

    for label in label_columns:
        category = get_category(label)
        mapping = label_mappings[category]
        true_labels = test_dataset[label].tolist()
        true_mapped_labels = [mapping[val] for val in true_labels]
        pred_mapped_labels = [mapping[val] for val in predictions[label]]

        accuracy = accuracy_score(true_mapped_labels, pred_mapped_labels)
        precision = precision_score(true_mapped_labels, pred_mapped_labels, average='weighted', zero_division=0)
        recall = recall_score(true_mapped_labels, pred_mapped_labels, average='weighted', zero_division=0)
        f1 = f1_score(true_mapped_labels, pred_mapped_labels, average='weighted', zero_division=0)
        report = classification_report(true_mapped_labels, pred_mapped_labels, zero_division=0)

        results.append({'Label': label, 'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1})

        print(f"Metrics for {label} ({category}):")
        print(report)

    results_df = pd.DataFrame(results)
    results_df.to_csv('/content/model_evaluation_results.csv', index=False)
    print("Evaluation results saved to /content/model_evaluation_results.csv")

# Paths to the model, tokenizer, and test data
model_path = "saved_models/gpt2"
tokenizer_path = "saved_models/gpt2"
test_data_path = "/content/ANNOTATEDNOTES.csv"
label_columns = ['sdoh_community_present', 'sdoh_community_absent', 'sdoh_education', 'sdoh_economics', 'sdoh_environment', 'behavior_alcohol', 'behavior_tobacco', 'behavior_drug']

evaluate_on_test_data(model_path, test_data_path, tokenizer_path, label_columns, max_seq_length=512)


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


OutOfMemoryError: CUDA out of memory. Tried to allocate 768.00 MiB. GPU 0 has a total capacty of 14.75 GiB of which 357.06 MiB is free. Process 48394 has 14.40 GiB memory in use. Of the allocated memory 12.79 GiB is allocated by PyTorch, and 1.48 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# AUROC FOR BINARY

from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Assuming y_true is your true binary labels and y_scores are the scores predicted by the model
fpr, tpr, thresholds = roc_curve(y_true, y_scores)
roc_auc = auc(fpr, tpr)

plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.show()


In [None]:
# AUROC FOR MULTI-CLASS
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from itertools import cycle

# Binarize the output labels for multi-class classification
y = label_binarize(y_true, classes=[*range(n_classes)])
n_classes = y.shape[1]

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y[:, i], y_scores[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot all ROC curves
plt.figure()
colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label='ROC curve of class {0} (area = {1:0.2f})'
                   ''.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Multi-class')
plt.legend(loc="lower right")
plt.show()