# Config for testing 

## Pick a config - default is to use the dev admin keys and admin account on local host 

In [4]:
from __future__ import annotations
from typing import Dict, List 
import os, sys, time 
from pathlib import Path 
data_files = Path(os.getcwd()) / "data"


# LOCALHOST Testing 
# Dev bypass creds for localhost - doesn't depend on creating an api key 
os.environ['FEATRIX_CLIENT_ID'] = 'bd5ec45d-1c22-49fb-9b14-b3b13b428c68'
os.environ['FEATRIX_CLIENT_SECRET'] = '4e7cddd7-dbdc-4f90-9a71-4b54cdec754e'

target_api_server = "http://localhost:3001"
allow_unencrypted_http = True 

# STAGE Testing -- you will need an API Key to be in ~/.featrix.key 
# target_api_server = "https://stage.featrix.com"
# allow_unencrypted_http = False

# PRODUCTION Testing -- you will need an API key in ~/.featrix.key or set it below 
# target_api_server = "https://app.featrix.com"
# allow_unencrypted_http = False

# If you have a key in the key file:
# if 'FEATRIX_CLIENT_ID' in os.environ:
#     del os.environ['FEATRIX_CLIENT_ID']
# if 'FEATRIX_CLIENT_SECRET' in os.environ:
#     del os.environ['FEATRIX_CLIENT_SECRET']

# Otherwise be sure that you have FEATRIX_CLIENT_ID and FEATRIX_CLIENT_SECRET set in the process that started the notebook 

import featrixclient as ft

FEATRIX_CLIENT_ID     = os.environ.get('FEATRIX_CLIENT_ID')
FEATRIX_CLIENT_SECRET = os.environ.get('FEATRIX_CLIENT_SECRET')
fc = ft.networkclient.new_client(
                target_api_server,
                client_id=FEATRIX_CLIENT_ID,
                client_secret=FEATRIX_CLIENT_SECRET,
                allow_unencrypted_http=allow_unencrypted_http,  # DEBUG
)


#  Create Neural Function

In [3]:

wh_data = data_files / "weight-height.csv"
fc.create_project("NNF WeightHeight Test 1")
fc.upload_file(wh_data, associate=True)
# This will create an es and a model -- because we are waiting for completion, it will also wait for the upload to be ready for training.  Otherwise we could use:
# while self.current_project.ready() is False: 
#     time.sleep(5)
nf, es_job, nf_job = fc.create_neural_function(target_fields="Gender", wait_for_completion=True)
# Since we waited for completion, the nf will be ready for predicitons. If we hadn't we would need to watch the two jobs for job.finished and job.error 
# Now do a prediction 
query = {'Weight': 170}
x = nf.predict(query)
print(f"Prediction query {query}: {x}")
fc.display_embedding_explorer()



Step 2/2:  Running: training; 100.0% complete; epoch = 75/75; batch = 250/250; current loss = 0.345, validation loss: 0.3160651922225952


AttributeError: from_job

# Create Neural Function Single Line

In [None]:
animals_small = data_files / "animals-1k.csv"

# This will create a project named CatDogSmall, make it the current project and wait for the training to complete. It will return the es and job 
es, job = fc.create_neural_function(target_fields="Animal", project="CatDogSmall", files=[animals_small], wait_for_completion=True)
# This will display the embedding explorer for the current projects only embedding space.  We could also call via:
# fc.display_embedding_explorer(embedding_space=es)
fc.display_embedding_explorer()

# Create Embedding Space 

In [None]:
animals = data_files / "animals.csv"

# fc.create_project("myfriends2")
# fc.upload_file(f1, associate=True)



#fc.create_neural_function(target_fields="hex_code", wait_for_completion=True)
fc.create_project("myfriends3")
fc.upload_file(animals, associate=True)
while not fc.current_project.ready():
    print(f"Waiting for upload in project {fc.current_project.name} to be processed...")
es, job = fc.create_embedding_space(name="Animals-large-dataset")
while job.finished is not True:
    print(f"Waiting for es {es.name} to be finished training...(sleep 10)")
    time.sleep(10)
    job = job.check()
es = es.by_id(es.id)
fc.display_embedding_explorer(embedding_space=es)

# Create Embedding Space Short

In [None]:
gh_data = data_files / "gh-train.csv"
hw_data = data_files / "hw-train.csv"

es, job = fc.create_embedding_space(name="GH-HW Embedding", project="GH Train Test Project", files=[gh_data, hw_data], wait_for_completion=True)
fc.display_embedding_explorer(embedding_space=es)

# Featrix CSV Loader 

In [None]:
#  -*- coding: utf-8 -*-
#
#  Copyright (c) 2024 Featrix, Inc, All Rights Reserved
#
#  Proprietary and Confidential.  Unauthorized use, copying or dissemination
#  of these materials is strictly prohibited.
#
import csv
import os
import traceback
from csv import Dialect
from io import StringIO
from pathlib import Path

import pandas as pd


def _find_bad_line_number(file_path: Path | str = None, buffer: bytes | str = None):
    try:
        if file_path:
            buffer = file_path.read_text()

        reader = csv.reader(buffer)
        linenumber = 1
        try:
            for row in reader:
                linenumber += 1
        except Exception as e:
            return linenumber
    except:
        pass
    return -1


# A wrapper for dealing with CSV files.
def featrix_wrap_pd_read_csv(
    file_path: str | Path = None, buffer: bytes | str = None, on_bad_lines="skip"
):
    """
    If you want to split CSVs in your notebook and so on when working
    with Featrix, this function should be used to capture the extra work
    around pandas' `pd.read_csv` that you'll want for best performance
    with Featrix. We will add split and a way to get back the test df
    to the client in a future release.

    Any column with an 'int' type -- meaning there doesn't seem to be a
    header line in the CSV -- will be renamed to `column_N`.

    Parameters
    ----------
    file_path : str
        Path to the CSV on your local system.
    buffer: str or bytes
        The CSV already in buffer
    on_bad_lines: str
        What to do with bad lines. By default, we 'skip', but you may want to 'error'.
        This is passed directly to `pd.read_csv`.

    This can raise exceptions if the file is not found or seems to be empty.

    """
    if not file_path and not buffer:
        raise ValueError(
            "No data provided via buffer or path to featrix_wrap_pd_read_csv"
        )
    if file_path:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"No such file {file_path}")
        # get the size of the file
        sz = os.path.getsize(file_path)
        if sz == 0:
            raise Exception(f"The file {file_path} appears to be 0 bytes long.")
    elif isinstance(buffer, bytes):
        buffer = buffer.decode()
    buffer_io = StringIO(buffer) if buffer else None
    dialect = None
    has_header = True

    sniffer = csv.Sniffer()
    if buffer:
        dialect = sniffer.sniff(buffer)
        has_header = sniffer.has_header(buffer)
    else:
        with open(file_path, newline="", errors='ignore') as csvfile:
            # For some very wide files, 2K isn't enough.
            # It's possible 256K isn't either, but one has to draw the line!
            try:
                sample = csvfile.read(32 * 1024)
            # except UnicodeDecodeError as err:
                # print("bad unicode:",dir(err))
                # print("err.reason: ", err.reason)
                # print("err.start: ", err.start)
                # print("err.end: ", err.end)
            except:
                bad_line = _find_bad_line_number(file_path=file_path, buffer=buffer)
                if bad_line > 0:
                    print("first BAD LINE WAS ...", bad_line)

            dialect = sniffer.sniff(sample)
            has_header = sniffer.has_header(sample)

    csv_parameters = {
        'delimiter': dialect.delimiter,
        'quotechar': dialect.quotechar,
        'escapechar': dialect.escapechar,
        'doublequote': dialect.doublequote,
        'skipinitialspace': dialect.skipinitialspace,
        'quoting': dialect.quoting,
        # Pandas does not support line terminators > 1 but Sniffer returns things like '\r\n'
        # 'lineterminator': dialect.lineterminator
    }

    if has_header:
        try:
            df = pd.read_csv(
                file_path or buffer_io,
                # Pandas doesn't take the same dialect as csv.Sniffer produces so we create csv_parameters
                # dialect=dialect,
                on_bad_lines=on_bad_lines,
                encoding_errors='ignore',
                **csv_parameters
            )
        except csv.Error as err:
            bad_line = _find_bad_line_number(file_path=file_path, buffer=buffer)
            if bad_line > 0:
                print("first BAD LINE WAS ...", bad_line)
            s_err = str(err)
            print(s_err)
            # FIXME: Not sure if there is something we can do if the buffer is hosed?
            if (
                s_err is not None
                and s_err.find("malformed") >= 0
                and file_path is not None
            ):
                df = pd.read_csv(
                    file_path,
                    # Pandas doesn't take the same dialect as csv.Sniffer produces so we create csv_parameters
                    # dialect=dialect,
                    on_bad_lines=on_bad_lines,
                    lineterminator="\n",
                    **csv_parameters
                )
                print("recovered")
            else:
                print("c'est la vie")
                raise err
            # endif

        # if any of the columns have an 'int' type, rename it.
        if df is not None:
            cols = list(df.columns)
            renames = {}
            for idx, c in enumerate(cols):
                if not isinstance(c, str):
                    renames[c] = "column_" + str(c)
            if len(renames) > 0:
                df.rename(columns=renames, inplace=True)

        return df

    if not has_header:
        # Try again -- and see.

        try:
            df = pd.read_csv(file_path or buffer_io,  **csv_parameters)
            cols = df.columns
            if len(cols) >= 0:
                if cols[0].startswith("Unnamed"):
                    # still no good.
                    raise Exception(
                        f"CSV file {file_path} doesn't seem to have a header line, which means it does not "
                        "have labels for the columns. This will make creating predictions on "
                        "specific targets difficult!"
                    )
            return df
        except Exception as err:  # noqa - catch anything
            traceback.print_exc()
            raise Exception(
                f"CSV file {file_path} doesn't seem to have a header line, which means it does not "
                "have labels for the columns. This will make creating predictions on specific targets difficult! [2]"
            )

    return None


In [6]:
fc.projects()
print(len(fc._projects))
resp = fc.current_project.delete()
print(f"delete response is type {type(resp)}")
print(resp.model_dump_json(indent=4))

0


AttributeError: 'NoneType' object has no attribute 'delete'

In [5]:
fc.get_uploads()
print(len(fc._uploads))

0


In [3]:
for key in fc._uploads.keys():
    upload = fc._uploads[key]
    ans = upload.delete()
    print(ans.model_dump_json(indent=4))

{
    "id": "6632a328e1f9dcaea6e0fe28",
    "created_by": "663298d5e1f9dcaea6e0fdd9",
    "created_at": "2024-05-01T20:16:40.402000",
    "updated_at": "2024-05-01T20:16:44.820000",
    "filename": "es_test_one-8cd4f897-2882-4353-9bf1-5032d855872d.csv",
    "pathname": "uploads/6632a328e1f9dcaea6e0fe28",
    "organization_id": "663298f53c2a7e3b97029a30",
    "num_rows": 1000,
    "num_cols": 3,
    "column_names": [
        "Animal",
        "height",
        "weight"
    ],
    "file_hash": "7509633fd50d20b141855d6c72872624",
    "load_errors": {},
    "post_processing_job_id": null,
    "sample_data": [
        {
            "Animal": "Dog",
            "height": 22.595827584633547,
            "weight": 16.338498948252546
        },
        {
            "Animal": "Dog",
            "height": 21.298301220167097,
            "weight": 15.178274588494544
        },
        {
            "Animal": "Cat",
            "height": 18.379075235911092,
            "weight": 8.106349997086319


# Code From UI Example 

In [None]:
FEATRIX_CLIENT_ID     = os.environ.get('FEATRIX_CLIENT_ID')
FEATRIX_CLIENT_SECRET = os.environ.get('FEATRIX_CLIENT_SECRET')

def predict_Loan_Status(
    input: Dict | List[Dict]
):
    """
    Call this function with a list of records or
    a single record as a dictionary.

    Returns an array of predictions.
    """
    client = ft.networkclient.new_client(
                'http://localhost:3001',  # DEBUG
                client_id=FEATRIX_CLIENT_ID,
                client_secret=FEATRIX_CLIENT_SECRET,
                allow_unencrypted_http=True,  # DEBUG
    )
    nf_id = '66227cf75749129f0aa3ea20'
    nf = client.get_neural_function(nf_id)
    x = nf.predict(input)
    return x


print(predict_Loan_Status({'weight': 20}))
