# Create a File Import Job Using the Arize GraphQL API

### Step 1: Initialize the GraphQL Client using your developer API key

In [None]:
!pip install gql[all]
from gql import Client, gql
from gql.transport.requests import RequestsHTTPTransport

### Get your API key
First, make sure you have developer permissions. If you are able to visit the [API explorer](https://app.arize.com/graphql), then you have developer permissions. If not, please ask your Account Admin to provide you with access. 

The API key can be retrieved from the [API explorer](https://app.arize.com/graphql) page. Click the button on the top right called "Get Your API Key." A modal will pop up with your key, copy that into the `API_KEY` constant below. 

NOTE: this key is different than the SDK key used to send data to Arize.

In [None]:
API_KEY = "YOUR_API_KEY"

# Select your transport with a defined URL endpoint
transport = RequestsHTTPTransport(
    url="https://app.arize.com/graphql/", headers={"x-api-key": API_KEY}
)

# Create a GraphQL client using the defined transport
client = Client(transport=transport, fetch_schema_from_transport=True)

### Step 2: Declare the mutation inputs
The below inputs represent the creation of an import job for one particular sample use case. For an exhaustive list of mutation inputs and their descriptions, please consult the docs in the [API explorer](https://app.arize.com/graphql).

In [None]:
# Spaces have globally unique IDs. You can get your spaceId by visiting app.arize.com.
# The url will be in this format: https://app.arize.com/organizations/:orgId/spaces/:spaceId
# NOTE: this is not the same as the space key used to send data using the SDK
SPACE_ID = "YOUR_SPACE_ID"
MODEL_NAME = "YOUR_MODEL_NAME"
MODEL_TYPE = "YOUR_MODEL_TYPE"
MODEL_ENVIRONMENT_NAME = "YOUR_MODEL_ENVIRONMENT"
BLOB_STORE = "YOUR_BLOB_STORE"
BUCKET_NAME = "YOUR_STORAGE_BUCKET_NAME"
PREFIX = "YOUR_FILE_PATH"

# The schema maps the file contents to the model inferences. For more information about this mapping, please consult https://docs.arize.com/arize/data-ingestion/object-store-integration/file-schema
# An example schema is provided below - this schema represents a file containing prediction id, prediction label, timestamp, and actual label columns. Since "features" is not included as a field, all
# other non-reserved columns in the schema declaration will be inferred to be features.
SCHEMA = {
    "predictionId": "YOUR_PREDICTION_ID_COLUMN_NAME",
    "predictionLabel": "YOUR_PREDICTION_LABEL_COLUMN_NAME",
    "timestamp": "YOUR_TIMESTAMP_COLUMN_NAME",
    "actualLabel": "YOUR_ACTUAL_LABEL_COLUMN_NAME",
}

### Step 3: Dry run the import job to make sure that it is set up correctly (optional, but recommended)
To learn more about the dry run mechanism, please consult our file importer data ingestion [docs](https://docs.arize.com/arize/data-ingestion/object-store-integration/file-importer-data-ingestion-faq).

In [None]:
# A re-usable mutation for creating an import job.
# Tip: the parameter `dryRun` is set to true in the below mutation to test the import job WITHOUT writing any changes to the server.
# No import job will be created as a result of running the below mutation.
# When the dry run parameter is set to true, the response of the createFileImportJob mutation will be a validationResult object.

create_file_import_job_dry_run = gql(
    """
     mutation createNewImportJob(
       $spaceId: ID!, 
       $modelName: String!,
       $modelType: ModelType!,
       $modelEnvironmentName: ModelEnvironmentName!,
       $blobStore: BlobStoreType!,
       $bucketName: String!,
       $prefix: String!,
       $schema: FileImportSchemaInputType!,
     ) {
        createFileImportJob(
          input: {
            spaceId: $spaceId,
            modelName: $modelName,
            modelType: $modelType,
            modelEnvironmentName: $modelEnvironmentName,
            blobStore: $blobStore,
            bucketName: $bucketName,
            prefix: $prefix,
            schema: $schema,
            dryRun: true,
          }
        ) {
          validationResult { 
            validationStatus 
            filePath 
            error {
              message
            } 
          }
        }
     }
    """
)

params = {
    "spaceId": SPACE_ID,
    "modelName": MODEL_NAME,
    "modelType": MODEL_TYPE,
    "modelEnvironmentName": MODEL_ENVIRONMENT_NAME,
    "blobStore": BLOB_STORE,
    "bucketName": BUCKET_NAME,
    "prefix": PREFIX,
    "schema": SCHEMA,
}

result = client.execute(create_file_import_job_dry_run, params)
print(
    f'The validation status of the dry run: {result["createFileImportJob"]["validationResult"]["validationStatus"]}'
)

# print the error if the dry run fails
if result["createFileImportJob"]["validationResult"]["validationStatus"] == "fail":
    print(
        f'{result["createFileImportJob"]["validationResult"]["filePath"]}: {result["createFileImportJob"]["validationResult"]["error"]["message"]}'
    )

### Step 4: Create the import job after a successful dry run

In [None]:
# You can now set the dryRun parameter to false or remove it entirely from the mutation input and create the import job.
# When the dryRun parameter is set to false or excluded, response of the createFileImportJob mutation will be a space and job object.

create_file_import_job = gql(
    """
     mutation createNewImportJob(
       $spaceId: ID!, 
       $modelName: String!,
       $modelType: ModelType!,
       $modelEnvironmentName: ModelEnvironmentName!,
       $blobStore: BlobStoreType!,
       $bucketName: String!,
       $prefix: String!,
       $schema: FileImportSchemaInputType!,
     ) {
        createFileImportJob(
          input: {
            spaceId: $spaceId,
            modelName: $modelName,
            modelType: $modelType,
            modelEnvironmentName: $modelEnvironmentName,
            blobStore: $blobStore,
            bucketName: $bucketName,
            prefix: $prefix,
            schema: $schema,
          }
        ) {
          fileImportJob { id }
        }
     }
    """
)

params = {
    "spaceId": SPACE_ID,
    "modelName": MODEL_NAME,
    "modelType": MODEL_TYPE,
    "modelEnvironmentName": MODEL_ENVIRONMENT_NAME,
    "blobStore": BLOB_STORE,
    "bucketName": BUCKET_NAME,
    "prefix": PREFIX,
    "schema": SCHEMA,
}

result = client.execute(create_file_import_job, params)
print(f"✅ You have successfully created your file import job")