<a href="https://colab.research.google.com/github/Er-Divyesh-Sethiya/AI-in-different-domain-/blob/main/E2E_predictive_modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prerequisite

### [Sign Up](https://abacus.ai/app/signup?signupToken=e2e_ui) to get started with Abacus.AI platform



# Useful Snippets for Demo


## SQL

In [None]:
SELECT fixed_acidity+volatile_acidity AS total_acidity,
       ph-(0.22*citric_acid) AS final_ph,
       residual_sugar,
       chlorides,
       density,
       sulphates,
       alcohol,
       free_sulfur_dioxide,
       total_sulfur_dioxide,
       quality
  FROM <sample_data_red_wine_quality_1662652291>

## Python

In [None]:
def transform_data(input_fg_df):
    input_fg_df['final_ph'] = input_fg_df['pH']-(0.22*input_fg_df['citric_acid']) 
    input_fg_df['total_acidity'] = input_fg_df['fixed_acidity']+input_fg_df['volatile_acidity'] 
    return input_fg_df

# API Workbook (Optional)

In [None]:
!pip install abacusai

We'll also import pandas and pprint tools for visualization in this notebook.

In [None]:
import pandas as pd # A tool we'll use to download and preview CSV files
import pprint # A tool to pretty print dictionary outputs
pp = pprint.PrettyPrinter(indent=2)

2. Add your Abacus.AI [API Key](https://abacus.ai/app/profile/apikey) generated using the API dashboard as follows:

In [None]:
#@title Abacus.AI API Key

api_key = ''  #@param {type: "string"}

3. Import the Abacus.AI library and instantiate a client.

In [None]:
from abacusai import ApiClient
client = ApiClient(api_key)

In [None]:
client.list_use_cases()

In [None]:
#@title Abacus.AI Use Case

use_case = 'PREDICTING'  #@param {type: "string"}

By calling the `describe_use_case_requirements` method we can view what datasets are required for this use_case.

In [None]:
for requirement in client.describe_use_case_requirements(use_case):
  pp.pprint(requirement.to_dict())

Finally, let's create the project.

In [None]:
lead_scoring_project = client.create_project(name='Predictive modeling', use_case=use_case)
lead_scoring_project.to_dict()

In [None]:
pd.read_csv('https://s3.amazonaws.com//realityengines.exampledatasets/predicting/red_wine_quality.csv')

In [None]:
lead_scoring_dataset = client.create_dataset_from_file_connector(name='red wine quality',table_name='red_wine_quality',
                                     location='s3://realityengines.exampledatasets/predicting/red_wine_quality.csv',
                                     refresh_schedule='0 12 * * *')
datasets = [lead_scoring_dataset]
for dataset in datasets:
    dataset.wait_for_inspection()

SQL FG, Train and Deployment

In [None]:
feature_group_sql = client.create_feature_group(table_name='Predictive_Lead_Scoring_SQL_FG',sql='SELECT fixed_acidity+volatile_acidity AS total_acidity,ph-(0.22*citric_acid) AS final_ph,residual_sugar,chlorides,density,sulphates,alcohol,free_sulfur_dioxide,total_sulfur_dioxide,quality  FROM red_wine_quality')

In [None]:
client.add_feature_group_to_project(feature_group_id=feature_group_sql.feature_group_id,project_id = lead_scoring_project.project_id)
client.set_feature_group_type(feature_group_id=feature_group_sql.feature_group_id, project_id = lead_scoring_project.project_id, feature_group_type= "CUSTOM_TABLE")
client.get_feature_group_schema(feature_group_id=feature_group_sql.feature_group_id)
client.set_feature_mapping(project_id = lead_scoring_project.project_id,feature_group_id= feature_group_sql.feature_group_id, feature_name='quality',feature_mapping='TARGET')
lead_scoring_project.validate(feature_group_ids=[feature_group_sql.feature_group_id])



Train SQL

In [None]:
lead_scoring_project.get_training_config_options(feature_group_ids=[feature_group_sql.feature_group_id])
lead_scoring_model = lead_scoring_project.train_model(training_config={},feature_group_ids=[feature_group_sql.feature_group_id])
print(lead_scoring_model.to_dict())
lead_scoring_model.wait_for_full_automl()

Deploy SQL

In [None]:
pp.pprint(lead_scoring_model.get_metrics().to_dict())
deployment_token = lead_scoring_project.create_deployment_token().deployment_token
print(deployment_token)
lead_scoring_deployment = client.create_deployment( name ='predicive Scoring Deployment', model_id=lead_scoring_model.model_id,description='predictive')
lead_scoring_deployment.wait_for_deployment()

In [None]:
feature_group_python=client.create_feature_group_from_function(
    table_name="red_wine_quality_python2",
    function_source_code="def transform_data(input_fg_df):\n input_fg_df['final_ph'] = input_fg_df['pH']-(0.22*input_fg_df['citric_acid'])\n input_fg_df['total_acidity'] = input_fg_df['fixed_acidity']+input_fg_df['volatile_acidity']\n return input_fg_df",
    function_name="transform_data",
    input_feature_groups=["sample_data_red_wine_quality_1662652291"])


#train python
lead_scoring_project.get_training_config_options(feature_group_ids=[feature_group_python.feature_group_id])
lead_scoring_model = lead_scoring_project.train_model(training_config={},feature_group_ids=[feature_group_python.feature_group_id])
print(lead_scoring_model.to_dict())
lead_scoring_model.wait_for_full_automl()



#delpoy python
pp.pprint(lead_scoring_model.get_metrics().to_dict())
deployment_token = lead_scoring_project.create_deployment_token().deployment_token
print(deployment_token)
lead_scoring_deployment = client.create_deployment( name ='predicive Scoring Deployment python', model_id=lead_scoring_model.model_id,description='predictive')
lead_scoring_deployment.wait_for_deployment()


In [None]:
feature_group_sampling=client.create_sampling_feature_group(
    feature_group_id="54695426e",
    table_name="sample_fg1",
    sampling_config={
        "samplingMethod": "PERCENT_SAMPLING",
        "keyColumns": [],
        "samplePercent": 0.6,
    },
)



#train
lead_scoring_project.get_training_config_options(feature_group_ids=[feature_group_sampling.feature_group_id])
lead_scoring_model = lead_scoring_project.train_model(training_config={},feature_group_ids=[feature_group_sampling.feature_group_id])
print(lead_scoring_model.to_dict())
lead_scoring_model.wait_for_full_automl()


#deploy 
pp.pprint(lead_scoring_model.get_metrics().to_dict())
deployment_token = lead_scoring_project.create_deployment_token().deployment_token
print(deployment_token)
lead_scoring_deployment = client.create_deployment( name ='predictive model Deployment', model_id=lead_scoring_model.model_id, description='predictive')
lead_scoring_deployment.wait_for_deployment()

In [None]:
client.describe_use_case_requirements(use_case)[0].allowed_feature_mappings

## 6. Predict


In [None]:
ApiClient().predict(deployment_token=deployment_token,
                         deployment_id=lead_scoring_deployment.deployment_id,
                         query_data={"fixed_acidity":9.1,"volatile_acidity":0.47,"citric_acid":0.49,"residual_sugar":2.6,"chlorides":0.094,"free_sulfur_dioxide":38,"total_sulfur_dioxide":106,"density":0.9982,"pH":3.08,"sulphates":0.59,"alcohol":9.1})