In [5]:
# Basic data analysis libraries
import pandas as pd

# Text pre-processing libraries
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import os
import azureml.core

from azureml.core import Experiment

from azureml.core import Run
from azureml.core import Workspace, Dataset, Model
import joblib

In [3]:
from azureml.core.authentication import InteractiveLoginAuthentication
ia = InteractiveLoginAuthentication(tenant_id='2b3cdc74-0511-49a7-92e7-00d8abdf2be4')
# You can find tenant id under azure active directory->properties
ws = Workspace.get(name='societree-nmit-2021',
                     subscription_id='d069e33b-af31-45d3-89cf-0e3064b3889b',
                     resource_group='societree-nmit-2021',auth=ia)

Note, we have launched a browser for you to login. For old experience with device code, use "az login --use-device-code"


Performing interactive authentication. Please follow the instructions on the terminal.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


In [4]:
df = pd.read_csv('./data_from_azure/ngo_details.csv')
df

Unnamed: 0,Name,Description
0,Aavi Rural and Urban Development Organisation ...,social services urban rural karnataka adults f...
1,Arsh Yog Avam Jan Kalyan Trust,health urban rural all india adolescents adult...
2,BALLSY GROUP,social services urban rural assam adults
3,BAMONGRAM RESHAM KHADI PRATISTHAN,social services urban rural west bengal accide...
4,BARRACKPUR BELA MEMORIAL CHARITABLE WELFARE SO...,health rural west bengal children dalits rural...
...,...,...
2572,Women Organisation for Rural Development (WORD),social services rural odisha women
2573,WOMENITE,social services urban rural all india adolesce...
2574,Women'S India Trust,development and housing urban rural maharashtr...
2575,WWH ( WE WILL HELP) CHARITABLE FOUNDATION,social services urban rural maharashtra accide...


In [7]:
tfidf = TfidfVectorizer(analyzer='word', min_df=0)

vec_arr = tfidf.fit_transform(df['Description'])
vec_features = tfidf.get_feature_names()

sim_mat = cosine_similarity(vec_arr, vec_arr)

titles = df['Name']
indices = pd.Series(df.index, index=df['Name'])

In [12]:
our_model = (sim_mat, titles, indices)
model_file = './pickle_file/ngo_rec_model.pkl'
joblib.dump(our_model, filename=model_file, compress=0, protocol=None, cache_size=None)

['./pickle_file/ngo_rec_model.pkl']

In [None]:
#Driver Function for getting recommendations

def get_recommendations(name):
    idx = indices[name]
    print(idx)
    sim_scores = list(enumerate(sim_mat[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    ngo_indices = [i[0] for i in sim_scores]
    print(sim_scores)
    print(ngo_indices)
    return titles.iloc[ngo_indices].values

In [13]:
#registering model

model = Model.register(workspace=ws,
                    model_name='recommender-model',
                    model_path=os.path.join('.','pickle_file/ngo_rec_model.pkl'),
                    tags={'subject': "NMIT-Hack-2021", 'type': "TF-IDF custom"},
                    description="")

Registering model recommender-model


In [24]:
experiment_folder = "./pickle_file"

In [25]:
script_file = os.path.join(experiment_folder,"earlierscoringscript.py")
script_file

'./pickle_file/earlierscoringscript.py'

In [17]:
#Creating YAML file consisting of the dependecied and their version number

from azureml.core.conda_dependencies import CondaDependencies 

# Add the dependencies for our model (AzureML defaults is already included)
myenv = CondaDependencies()
myenv.add_conda_package('scikit-learn')

experiment_folder = "./pickle_file/"

# Save the environment config as a .yml file
env_file = os.path.join(experiment_folder,"NGO_Rec_dependancies.yml")
with open(env_file,"w") as f:
    f.write(myenv.serialize_to_string())
print("Saved dependency info in", env_file)

# Print the .yml file
with open(env_file,"r") as f:
    print(f.read())

Saved dependency info in ./pickle_file/NGO_Rec_dependancies.yml
# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.
  - azureml-defaults

- scikit-learn
channels:
- anaconda
- conda-forge



In [28]:
#Deploying the Azure Model, So that It can be consumed as a REST API endpoint

from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig

# Configure the scoring environment
inference_config = InferenceConfig(runtime= "python",
                                   entry_script=script_file,
                                   conda_file=env_file)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

service_name = "ngo-rec-service-nmit"

service = Model.deploy(ws, service_name, [model], inference_config, deployment_config)

service.wait_for_deployment(True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running..........................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy
