In [3]:
import pandas as pd
import requests
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
def fetch_data(url):
    response = requests.get(url)
    return response.json()


def create_dataframe(data):
    return pd.DataFrame(data['data'])


def encode_diagnosis(df):
    df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})
    return df


def convert_to_numeric(df):
    df = df.apply(pd.to_numeric, errors='coerce')
    df['diagnosis'] = df['diagnosis'].astype(int)
    return df


def merge_data(df_cells, df_diagnosis):
    return pd.merge(df_cells, df_diagnosis, on='id')


def clean_data(df):
    df.drop(columns=['id'], inplace=True)
    df.drop_duplicates(inplace=True)
    df.dropna(inplace=True)
    return df


def format_data(df):
    df = encode_diagnosis(df)
    df = convert_to_numeric(df)
    return df


def save_breast_cancer_data_in_csv(df):
    df.to_csv('breast_cancer.csv', index=False)


def extract_data_in_csv(url_cells, url_diagnosis):
    data_brest_cancer_cells = fetch_data(url_cells)
    data_diagnosis = fetch_data(url_diagnosis)

    df_brest_cancer_cells = create_dataframe(data_brest_cancer_cells)
    df_diagnosis = create_dataframe(data_diagnosis)

    df_breast_cancer_merged = merge_data(df_brest_cancer_cells, df_diagnosis)
    df_breast_cancer_cleaned = clean_data(df_breast_cancer_merged)
    df_breast_cancer_formated = format_data(df_breast_cancer_cleaned)
    save_breast_cancer_data_in_csv(df_breast_cancer_formated)


url_cells = 'http://localhost:8000/breast_cancer_cells.json'
url_diagnosis = 'http://localhost:8000/breast_cancer_cells_diagnosis.json'


extract_data_in_csv(url_cells, url_diagnosis)