In [1]:
"""script"""
import json
import os
import pickle
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression


def get_input(local=False):
    if local:
        print("Reading local file 9c820e0e5b3a4264aa5058f24a82386d.csv")

        return "9c820e0e5b3a4264aa5058f24a82386d.csv"

    dids = os.getenv("DIDS", None)

    if not dids:
        print("No DIDs found in environment. Aborting.")
        return

    dids = json.loads(dids)

    for did in dids:
        filename = f"data/inputs/{did}/0"  # 0 for metadata service
        print(f"Reading asset file {filename}.")

        return filename


def run_linear_regression(local=False):
    filename = get_input(local)
    if not filename:
        print("Could not retrieve filename.")
        return

    iris_data = pd.read_csv(filename, header=0, nrows=12)

    X = iris_data.iloc[:, :1]  # we only take the first two features.

    classes = iris_data.iloc[:, -2]  # assume classes are the final column
    le = preprocessing.LabelEncoder()
    le.fit(classes)
    Y = le.transform(classes)

    # Create an instance of Logistic Regression Classifier and fit the data.
    logreg = LogisticRegression(C=1e5)
    logreg.fit(X, Y)

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    #x_min, x_max = X.iloc[:, 0].min() - 0.5, X.iloc[:, 0].max() + 0.5
    #y_min, y_max = X.iloc[:, 1].min() - 0.5, X.iloc[:, 1].max() + 0.5
    h = 0.02  # step size in the mesh
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)

    if local:
        print("Plotting results")
        plot(xx, yy, Z, X, Y)

    filename = "logistic_regression.pickle" if local else "/data/outputs/result"
    with open(filename, "wb") as pickle_file:
        print(f"Pickling results in {filename}")
        pickle.dump(Z, pickle_file)


def plot(xx, yy, Z, X, Y):
    plt.figure(1, figsize=(4, 3))
    plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)

    # Plot also the training points
    plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=Y, edgecolors="k", cmap=plt.cm.Paired)
    plt.xlabel("Sepal length")
    plt.ylabel("Sepal width")

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())

    plt.show()


if __name__ == "__main__":
    local = len(sys.argv) == 2 and sys.argv[1] == "local"
    run_linear_regression(True)
    #run_linear_regression(local)

Reading local file 9c820e0e5b3a4264aa5058f24a82386d.csv


IndexError: single positional indexer is out-of-bounds

Unnamed: 0,CODI EOI,NOM ESTACIO,DATA,MAGNITUD,CONTAMINANT,UNITATS,TIPUS ESTACIO,AREA URBANA,CODI INE,MUNICIPI,...,19h,20h,21h,22h,23h,24h,ALTITUD,LATITUD,LONGITUD,GEOREFERENCIA
0,43148003,Tarragona (Bonavista),25/01/2023,10,PM10,µg/m3,industrial,suburban,43148,Tarragona,...,28.0,29.0,39.0,33.0,24.0,20.0,39,41.115910,1.191999,POINT (1.1919986 41.11591)
1,8137001,Montseny (La Castanya),25/01/2023,12,NOX,µg/m3,background,rural,8137,Montseny,...,4.0,3.0,3.0,3.0,2.0,2.0,693,41.779280,2.358002,POINT (2.358002 41.77928)
2,8124009,Mollet del Vallès,25/01/2023,7,NO,µg/m3,traffic,suburban,8124,Mollet del Vallès,...,17.0,7.0,16.0,62.0,62.0,58.0,90,41.549183,2.212098,POINT (2.2120984 41.549183)
3,8114006,Martorell,25/01/2023,7,NO,µg/m3,background,suburban,8114,Martorell,...,2.0,4.0,7.0,7.0,3.0,1.0,78,41.475384,1.921202,POINT (1.9212021 41.475384)
4,8112003,Manlleu,25/01/2023,8,NO2,µg/m3,background,suburban,8112,Manlleu,...,22.0,35.0,38.0,36.0,32.0,28.0,460,42.003307,2.287299,POINT (2.2872992 42.003307)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3106369,8125002,Montcada i Reixac,01/01/1991,14,O3,µg/m3,traffic,suburban,8125,Montcada i Reixac,...,0.0,0.0,0.0,0.0,0.0,0.0,34,41.481972,2.188298,POINT (2.188298 41.481972)
3106370,8019004,Barcelona (Poblenou),01/01/1991,1,SO2,µg/m3,background,urban,8019,Barcelona,...,20.0,15.0,16.0,16.0,19.0,16.0,3,41.403878,2.204501,POINT (2.204501 41.403878)
3106371,8101001,L'Hospitalet de Llobregat,01/01/1991,6,CO,mg/m3,background,urban,8101,"Hospitalet de Llobregat, l'",...,1.9,1.5,1.6,1.6,1.1,1.0,29,41.370475,2.114999,POINT (2.114999 41.370475)
3106372,8125002,Montcada i Reixac,01/01/1991,7,NO,µg/m3,traffic,suburban,8125,Montcada i Reixac,...,71.0,157.0,167.0,204.0,136.0,74.0,34,41.481972,2.188298,POINT (2.188298 41.481972)
