In [1]:
import numpy as np
import pandas as pd
import json
import requests
import urllib.parse
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random

### Test dengan data API

In [2]:
API_URL = "https://api.goapi.io/stock/idx/prices"
API_KEY = '833b2587-34b1-5172-801a-6be0cf21'
SYMBOL_URL = "https://api.goapi.io/stock/idx/trending"

In [3]:
def generate_saham_tren(api_url, api_key):
  API_URL_KODE = api_url
  headers = {"X-API-KEY": api_key}

  response = requests.get(API_URL_KODE, headers=headers).json()
  response_str = json.dumps(response)
  SYMBOL = pd.json_normalize(json.loads(response_str), record_path=['data', 'results'])
  SYMBOL = SYMBOL['symbol']

  SYMBOL_string = ','.join(SYMBOL)

  return SYMBOL_string

In [4]:
SYMBOL_string = generate_saham_tren(SYMBOL_URL, API_KEY)

In [5]:
SYMBOL_string

'GOTO,BUMI,DEWA,BRMS,WIKA,NATO,GZCO,AMMN,NCKL,BRPT,BBCA,BBRI,MDKA,BBYB,APIC,PGEO,MIDI,BMRI,KLBF,MEDC,TLKM,EMTK,ASII,TOWR,TPIA,CMNT,BBNI,AMRT,ADRO,MAPI,ANTM,ARTO,UNVR,PTBA,BREN,ISAT,FILM,CUAN,CPIN,INCO,INDF,PANI,UNTR,ICBP,TCPI'

In [6]:
def transform_json_to_df(api_url, symbol, api_key):
  API_URL = api_url
  SYMBOL = symbol
  API_KEY = api_key

  query_params = urllib.parse.urlencode({'symbols': SYMBOL})
  new_url = API_URL + '?' + query_params

  url = new_url
  headers = {"X-API-KEY": API_KEY}

  response = requests.get(url, headers=headers).json()
  response_str = json.dumps(response)
  df = pd.json_normalize(json.loads(response_str), record_path=['data', 'results'])
  df = df[['company.name', 'company.logo', 'symbol', 'date', 'open', 'high', 'low', 'close', 'volume']]
  df["hasil_mean"] = df.apply(lambda x: (x["open"] + x["high"] + x["low"] + x["close"]) / 4, axis=1)
  return df

In [7]:
df_json = transform_json_to_df(API_URL, SYMBOL_string, API_KEY)

In [8]:
df_json

Unnamed: 0,company.name,company.logo,symbol,date,open,high,low,close,volume,hasil_mean
0,Adaro Energy Indonesia Tbk.,https://s3.goapi.id/logo/ADRO.jpg,ADRO,2023-12-15,2490,2570,2490,2520,52881100,2517.5
1,Amman Mineral Internasional Tbk.,https://s3.goapi.id/logo/AMMN.jpg,AMMN,2023-12-15,6700,6800,6325,6325,338347300,6537.5
2,Sumber Alfaria Trijaya Tbk.,https://s3.goapi.id/logo/AMRT.jpg,AMRT,2023-12-15,2860,2880,2770,2800,57724800,2827.5
3,Aneka Tambang Tbk.,https://s3.goapi.id/logo/ANTM.jpg,ANTM,2023-12-15,1650,1670,1640,1670,34436800,1657.5
4,Pacific Strategic Financial Tbk.,https://s3.goapi.id/logo/APIC.jpg,APIC,2023-12-15,1170,1180,1150,1165,218847000,1166.25
5,Bank Jago Tbk.,https://s3.goapi.id/logo/ARTO.jpg,ARTO,2023-12-15,3310,3360,3150,3210,32270700,3257.5
6,Astra International Tbk.,https://s3.goapi.id/logo/ASII.jpg,ASII,2023-12-15,5625,5700,5600,5650,93580400,5643.75
7,Bank Central Asia Tbk.,https://s3.goapi.id/logo/BBCA.jpg,BBCA,2023-12-15,9125,9225,9075,9225,275013500,9162.5
8,Bank Negara Indonesia (Persero) Tbk.,https://s3.goapi.id/logo/BBNI.jpg,BBNI,2023-12-15,5425,5425,5275,5275,58451500,5350.0
9,Bank Rakyat Indonesia (Persero) Tbk.,https://s3.goapi.id/logo/BBRI.jpg,BBRI,2023-12-15,5575,5600,5550,5550,252448800,5568.75


In [9]:
df_json.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   company.name  45 non-null     object 
 1   company.logo  45 non-null     object 
 2   symbol        45 non-null     object 
 3   date          45 non-null     object 
 4   open          45 non-null     int64  
 5   high          45 non-null     int64  
 6   low           45 non-null     int64  
 7   close         45 non-null     int64  
 8   volume        45 non-null     int64  
 9   hasil_mean    45 non-null     float64
dtypes: float64(1), int64(5), object(4)
memory usage: 3.6+ KB


In [10]:
df_json.describe()

Unnamed: 0,open,high,low,close,volume,hasil_mean
count,45.0,45.0,45.0,45.0,45.0,45.0
mean,3842.466667,3931.377778,3783.288889,3849.088889,304080900.0,3851.555556
std,4119.806238,4226.127775,4069.528639,4126.902618,667837600.0,4134.80878
min,60.0,64.0,55.0,61.0,7726800.0,60.0
25%,995.0,1020.0,990.0,1000.0,23165500.0,1001.25
50%,2490.0,2570.0,2490.0,2520.0,93580400.0,2517.5
75%,5575.0,5600.0,5550.0,5550.0,252448800.0,5568.75
max,21500.0,21950.0,21275.0,21275.0,3854647000.0,21500.0


In [11]:
df_json.isnull().sum()

company.name    0
company.logo    0
symbol          0
date            0
open            0
high            0
low             0
close           0
volume          0
hasil_mean      0
dtype: int64

In [12]:
df_json.duplicated().sum()

0

### Model Content-based Recommendation
Merekomendasikan item yang mirip dengan item similarity degree. Content-based filtering akan mempelajari data saham pada mean berdasarkan objek data similarity degree.

In [13]:
# Inisialisasi TfidfVectorizer
tfv = TfidfVectorizer()
# Melakukan perhitungan idf pada data cuisine
tfv.fit(df_json['hasil_mean'].astype(str))

Selanjutnya, melakukan fit dan transformasi atribut data saham yaitu hasil_mean ke dalam bentuk matriks.

In [14]:
# Melakukan fit lalu ditransformasikan ke bentuk matrix
tfidf_matrix = tfv.fit_transform(df_json['hasil_mean'].astype(str))

# Melihat ukuran matrix tfidf
tfidf_matrix.shape

(45, 47)

Untuk menghasilkan vektor tf-idf dalam bentuk matriks, menggunakan fungsi [todense()](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.todense.html)

In [15]:
tfidf_matrix.todense()

matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [16]:

# Membuat dataframe untuk melihat tf-idf matrix
# Kolom diisi dengan hasil mean
# Baris diisi dengan kode saham

pd.DataFrame(
    tfidf_matrix.todense(),
    columns = tfv.get_feature_names_out(),
    index   = df_json.symbol
).sample(20, axis=1).sample(10, axis=0)

Unnamed: 0_level_0,9562,5068,1278,433,4841,75,92,1053,1001,2517,247,3257,6537,1582,1690,2827,5643,5406,21500,89
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
PANI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.820359,0.0,0.0
CUAN,0.0,0.0,0.0,0.0,0.0,0.480154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
APIC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BBNI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BUMI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.820359
MAPI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
INDF,0.0,0.0,0.0,0.0,0.0,0.480154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ASII,0.0,0.0,0.0,0.0,0.0,0.480154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.877184,0.0,0.0,0.0
AMRT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
ARTO,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Cosine Similiarity
Selanjutnya, menghitung derajat kesamaan (similarity degree) antar hasil mean dengan teknik cosine similarity menggunakan fungsi [cosine_similarity](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.cosine_similarity.html) dari library sklearn.

In [17]:
# Menghitung cosine similarity pada matrix tf-idf
cosine_sim = cosine_similarity(tfidf_matrix)
cosine_sim

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [18]:
# Membuat dataframe dari variabel cosine_sim dengan baris dan kolom berupa kode saham
cosine_sim_df = pd.DataFrame(
    cosine_sim,
    columns=df_json['symbol'],
    index=df_json['symbol']
)

print('Shape:', cosine_sim_df.shape)

# Melihat similarity matrix pada setiap hasil mean
cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

Shape: (45, 45)


symbol,ASII,ISAT,NATO,TOWR,ARTO
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
PTBA,0.0,0.0,0.0,0.0,0.0
BRPT,0.0,0.0,0.0,0.0,0.0
BRMS,0.230548,0.0,0.230548,0.0,0.0
CPIN,0.230548,0.0,0.230548,0.0,0.0
PGEO,0.230548,0.0,0.230548,0.0,0.0
INCO,0.0,0.0,0.0,0.0,0.0
ASII,1.0,0.0,0.230548,0.0,0.0
ISAT,0.0,1.0,0.0,0.0,0.0
UNVR,0.0,0.0,0.0,0.0,0.0
BBYB,0.230548,0.0,0.230548,0.0,0.0


In [19]:
def model_function(df_json):
  # Inisialisasi TfidfVectorizer
  tfv = TfidfVectorizer()
  # Melakukan perhitungan idf pada data cuisine
  tfv.fit(df_json['hasil_mean'].astype(str))
  tfidf_matrix = tfv.fit_transform(df_json['hasil_mean'].astype(str))
  tfidf_matrix.todense()
  cosine_sim = cosine_similarity(tfidf_matrix)
  cosine_sim_df = pd.DataFrame(
    cosine_sim,
    columns=df_json['symbol'],
    index=df_json['symbol']
  )
  return cosine_sim_df

### Recommendation Testing

In [20]:
cosine_sim_df

symbol,ADRO,AMMN,AMRT,ANTM,APIC,ARTO,ASII,BBCA,BBNI,BBRI,...,PANI,PGEO,PTBA,TCPI,TLKM,TOWR,TPIA,UNTR,UNVR,WIKA
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADRO,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMMN,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMRT,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ANTM,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
APIC,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.327011,0.0,0.0,0.0,0.0,0.327011,0.327011,0.0,0.0,0.0
ARTO,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ASII,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.230548,...,0.0,0.230548,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BBCA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BBNI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BBRI,0.0,0.0,0.0,0.0,0.0,0.0,0.230548,0.0,0.0,1.0,...,0.0,0.230548,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Membuat fungsi saham_recommendations untuk menampilkan data kode saham yang direkomendasikan oleh algoritma sistem yang telah dibuat, dengan parameter masukan berupa kode saham yang akan di dinisiasi pada similarity degree.

In [69]:

def saham_recommendations(hasil_mean, similarity_data=cosine_sim_df, items=df_json, k=10):
    # Mengambil data dengan menggunakan argpartition untuk melakukan partisi secara tidak langsung sepanjang sumbu yang diberikan
    # Dataframe diubah menjadi numpy
    # Range(start, stop, step)
    saham_max = df_json[df_json.hasil_mean.eq(df_json["hasil_mean"].max())]
    if hasil_mean == "Tidak ada rekomendasi Saham, Pengeluaran anda terlalu banyak":
      return "Tidak ada rekomendasi Saham, Pengeluaran anda terlalu banyak"
    elif (hasil_mean == saham_max.loc[:, 'symbol'].to_string(index=False)):
      df_sorted = df_json.sort_values(by="hasil_mean", ascending=False).head(5)
      return df_sorted
    else:
      index = similarity_data.loc[:,hasil_mean].to_numpy().argpartition(range(-1, -k, -1))
      # Mengambil data dengan similarity terbesar dari index yang ada
      closest = similarity_data.columns[index[-1:-(k+2):-1]]

      return pd.DataFrame(closest).merge(items).head(k).drop_duplicates()

In [70]:
df_json['symbol']

0     ADRO
1     AMMN
2     AMRT
3     ANTM
4     APIC
5     ARTO
6     ASII
7     BBCA
8     BBNI
9     BBRI
10    BBYB
11    BMRI
12    BREN
13    BRMS
14    BRPT
15    BUMI
16    CMNT
17    CPIN
18    CUAN
19    DEWA
20    EMTK
21    FILM
22    GOTO
23    GZCO
24    ICBP
25    INCO
26    INDF
27    ISAT
28    KLBF
29    MAPI
30    MDKA
31    MEDC
32    MIDI
33    NATO
34    NCKL
35    PANI
36    PGEO
37    PTBA
38    TCPI
39    TLKM
40    TOWR
41    TPIA
42    UNTR
43    UNVR
44    WIKA
Name: symbol, dtype: object

In [71]:
df_json['hasil_mean']

0      2517.50
1      6537.50
2      2827.50
3      1657.50
4      1166.25
5      3257.50
6      5643.75
7      9162.50
8      5350.00
9      5568.75
10      485.75
11     5937.50
12     7300.00
13      178.75
14     1602.50
15       89.25
16      937.50
17     5068.75
18    12393.75
19       60.00
20      575.00
21     3810.00
22       92.75
23      151.75
24    10525.00
25     4202.50
26     6343.75
27     9562.50
28     1582.50
29     1690.00
30     2352.50
31     1141.25
32      433.00
33      191.75
34     1053.75
35     5406.25
36     1278.75
37     2337.50
38     7875.00
39     3950.00
40     1001.25
41     4841.25
42    21500.00
43     3432.50
44      247.00
Name: hasil_mean, dtype: float64

In [85]:
def transform_data(data_pemasukan, data_pengeluaran, data_saham):

  # Inisialisasi variabel testing
  # data pemasukan dan pengeluaran selama seminggu
  pemasukan_mean = np.median(data_pemasukan)
  pengeluaran_mean = np.median(data_pengeluaran)
  perbandingan = pemasukan_mean - pengeluaran_mean


  if (perbandingan < 0):
    return "Tidak ada rekomendasi Saham, Pengeluaran anda terlalu banyak"
  else:
    # Cari baris yang sesuai
    diff = perbandingan - data_saham["hasil_mean"]

    if (diff.max() > data_saham["hasil_mean"].max()):
      data_max = data_saham["hasil_mean"].max()

      saham_max = data_saham[data_saham.hasil_mean.eq(data_max)]

      data_selected_saham_max = saham_max.loc[:, 'symbol'].to_string(index=False)
      return data_selected_saham_max
    else:
      idx = diff <= 10

      actual_df = data_saham[idx]
      data_selected = actual_df['hasil_mean'].min()

      saham_selected = data_saham[data_saham.hasil_mean.eq(data_selected)]

      data_selected_saham = saham_selected.loc[:, 'symbol'].to_string(index=False)
      return data_selected_saham

In [94]:
data_pemasukan = [500000000, 800000000, 4000000, 4500000, 90000, 80000000, 75000]
data_pengeluaran = [70000, 30000000, 45000000, 45000, 93000, 77000, 80000000]

data_selected_saham = transform_data(data_pemasukan, data_pengeluaran, df_json)

In [95]:
data_selected_saham

'UNTR'

In [96]:
df_json['hasil_mean'].max()

21500.0

### Hasil rekomendasi

In [97]:
saham_recommendations(data_selected_saham)

Unnamed: 0,company.name,company.logo,symbol,date,open,high,low,close,volume,hasil_mean
42,United Tractors Tbk.,https://s3.goapi.id/logo/UNTR.jpg,UNTR,2023-12-15,21500,21950,21275,21275,9804200,21500.0
18,Petrindo Jaya Kreasi Tbk.,https://s3.goapi.id/logo/CUAN.jpg,CUAN,2023-12-15,12000,13050,11725,12800,16586300,12393.75
24,Indofood CBP Sukses Makmur Tbk.,https://s3.goapi.id/logo/ICBP.jpg,ICBP,2023-12-15,10575,10675,10425,10425,8340900,10525.0
27,Indosat Tbk.,https://s3.goapi.id/logo/ISAT.jpg,ISAT,2023-12-15,9575,9675,9500,9500,20213000,9562.5
7,Bank Central Asia Tbk.,https://s3.goapi.id/logo/BBCA.jpg,BBCA,2023-12-15,9125,9225,9075,9225,275013500,9162.5


10 Saham (sesuai banyak k) yang direkomendasikan bersesuaian dengan kode saham dari Cosine Similiarity

akurasi nya 10/10 * 100 = 100%

### Test deploy

In [98]:
!pip install dill

Collecting dill
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.7


In [99]:
import dill

In [100]:
C = 1
output_file = f'model_recomendation={C}:v2.bin'

In [101]:
f_out = open(output_file, 'wb')
dill.dump((generate_saham_tren, transform_json_to_df, transform_data, model_function), f_out)
f_out.close()

In [102]:
!ls -lh *.bin

-rw-r--r-- 1 root root 2.5K Dec 16 13:21 'model_recomendation=1:v2.bin'


Save the modeland test with new session

In [2]:
!pip install dill

Collecting dill
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.7


In [3]:
import numpy as np
import pandas as pd
import json
import requests
import urllib.parse
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random
import dill

In [4]:
API_URL = "https://api.goapi.io/stock/idx/prices"
API_KEY = '833b2587-34b1-5172-801a-6be0cf21'
SYMBOL_URL = "https://api.goapi.io/stock/idx/trending"

In [7]:
input_file = '/content/model_recomendation=1_v2.bin'

In [8]:
with open(input_file, 'rb') as f_in:
    generate_saham_tren, transform_json_to_df, transform_data, model = dill.load(f_in)

In [9]:
SYMBOL_string = generate_saham_tren(SYMBOL_URL, API_KEY)

In [10]:
SYMBOL_string

'GOTO,BUMI,DEWA,BRMS,WIKA,NATO,GZCO,AMMN,NCKL,BRPT,BBCA,BBRI,MDKA,BBYB,APIC,PGEO,MIDI,BMRI,KLBF,MEDC,TLKM,EMTK,ASII,TOWR,TPIA,CMNT,BBNI,AMRT,ADRO,MAPI,ANTM,ARTO,UNVR,PTBA,BREN,ISAT,FILM,CUAN,CPIN,INCO,INDF,PANI,UNTR,ICBP,TCPI'

In [11]:
data_saham = transform_json_to_df(API_URL, SYMBOL_string, API_KEY)

In [12]:
data_saham

Unnamed: 0,company.name,company.logo,symbol,date,open,high,low,close,volume,hasil_mean
0,Adaro Energy Indonesia Tbk.,https://s3.goapi.id/logo/ADRO.jpg,ADRO,2023-12-15,2490,2570,2490,2520,52881100,2517.5
1,Amman Mineral Internasional Tbk.,https://s3.goapi.id/logo/AMMN.jpg,AMMN,2023-12-15,6700,6800,6325,6325,338347300,6537.5
2,Sumber Alfaria Trijaya Tbk.,https://s3.goapi.id/logo/AMRT.jpg,AMRT,2023-12-15,2860,2880,2770,2800,57724800,2827.5
3,Aneka Tambang Tbk.,https://s3.goapi.id/logo/ANTM.jpg,ANTM,2023-12-15,1650,1670,1640,1670,34436800,1657.5
4,Pacific Strategic Financial Tbk.,https://s3.goapi.id/logo/APIC.jpg,APIC,2023-12-15,1170,1180,1150,1165,218847000,1166.25
5,Bank Jago Tbk.,https://s3.goapi.id/logo/ARTO.jpg,ARTO,2023-12-15,3310,3360,3150,3210,32270700,3257.5
6,Astra International Tbk.,https://s3.goapi.id/logo/ASII.jpg,ASII,2023-12-15,5625,5700,5600,5650,93580400,5643.75
7,Bank Central Asia Tbk.,https://s3.goapi.id/logo/BBCA.jpg,BBCA,2023-12-15,9125,9225,9075,9225,275013500,9162.5
8,Bank Negara Indonesia (Persero) Tbk.,https://s3.goapi.id/logo/BBNI.jpg,BBNI,2023-12-15,5425,5425,5275,5275,58451500,5350.0
9,Bank Rakyat Indonesia (Persero) Tbk.,https://s3.goapi.id/logo/BBRI.jpg,BBRI,2023-12-15,5575,5600,5550,5550,252448800,5568.75


In [13]:
model = model(data_saham)

In [14]:
model

symbol,ADRO,AMMN,AMRT,ANTM,APIC,ARTO,ASII,BBCA,BBNI,BBRI,...,PANI,PGEO,PTBA,TCPI,TLKM,TOWR,TPIA,UNTR,UNVR,WIKA
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADRO,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMMN,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AMRT,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ANTM,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
APIC,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.327011,0.0,0.0,0.0,0.0,0.327011,0.327011,0.0,0.0,0.0
ARTO,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ASII,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.230548,...,0.0,0.230548,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BBCA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BBNI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BBRI,0.0,0.0,0.0,0.0,0.0,0.0,0.230548,0.0,0.0,1.0,...,0.0,0.230548,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
data_pemasukan = [50000, 80000, 40000000, 45000, 90000, 80000, 75000000]
data_pengeluaran = [70000, 30000000, 45000, 45000, 93000, 77000, 80000000]

test = transform_data(data_pemasukan, data_pengeluaran, data_saham)

In [35]:
test

'ARTO'

In [36]:

def saham_recommendations(hasil_mean, similarity_data=model, items=data_saham, k=10):
    # Mengambil data dengan menggunakan argpartition untuk melakukan partisi secara tidak langsung sepanjang sumbu yang diberikan
    # Dataframe diubah menjadi numpy
    # Range(start, stop, step)
    saham_max = data_saham[data_saham.hasil_mean.eq(data_saham["hasil_mean"].max())]
    if hasil_mean == "Tidak ada rekomendasi Saham, Pengeluaran anda terlalu banyak":
      return "Tidak ada rekomendasi Saham, Pengeluaran anda terlalu banyak"
    elif (hasil_mean == saham_max.loc[:, 'symbol'].to_string(index=False)):
      df_sorted = data_saham.sort_values(by="hasil_mean", ascending=False).head(5)
      return df_sorted
    else:
      index = similarity_data.loc[:,hasil_mean].to_numpy().argpartition(range(-1, -k, -1))
      # Mengambil data dengan similarity terbesar dari index yang ada
      closest = similarity_data.columns[index[-1:-(k+2):-1]]

      return pd.DataFrame(closest).merge(items).head(k).drop_duplicates()

# def saham_recommendations(hasil_mean, similarity_data=model, items=data_saham, k=10):
#     # Mengambil data dengan menggunakan argpartition untuk melakukan partisi secara tidak langsung sepanjang sumbu yang diberikan
#     # Dataframe diubah menjadi numpy
#     # Range(start, stop, step)
#     if hasil_mean == "Tidak ada rekomendasi Saham, Pengeluaran anda terlalu banyak":
#       return "Tidak ada rekomendasi Saham, Pengeluaran anda terlalu banyak"
#     else:
#       index = similarity_data.loc[:,hasil_mean].to_numpy().argpartition(range(-1, -k, -1))
#       # Mengambil data dengan similarity terbesar dari index yang ada
#       closest = similarity_data.columns[index[-1:-(k+2):-1]]

#       return pd.DataFrame(closest).merge(items).head(k).drop_duplicates()

In [37]:
saham_recommendations(test)

Unnamed: 0,symbol,company.name,company.logo,date,open,high,low,close,volume,hasil_mean
0,ARTO,Bank Jago Tbk.,https://s3.goapi.id/logo/ARTO.jpg,2023-12-15,3310,3360,3150,3210,32270700,3257.5
1,WIKA,Wijaya Karya (Persero) Tbk.,https://s3.goapi.id/logo/WIKA.jpg,2023-12-15,238,282,228,240,618522200,247.0
2,BRPT,Barito Pacific Tbk.,https://s3.goapi.id/logo/BRPT.jpg,2023-12-15,1600,1670,1530,1610,300345200,1602.5
3,BUMI,Bumi Resources Tbk.,https://s3.goapi.id/logo/BUMI.jpg,2023-12-15,87,93,86,91,1968494000,89.25
4,CMNT,Cemindo Gemilang Tbk.,https://s3.goapi.id/logo/CMNT.jpg,2023-12-15,915,960,915,960,58793500,937.5
5,BRMS,Bumi Resources Minerals Tbk.,https://s3.goapi.id/logo/BRMS.jpg,2023-12-15,180,181,177,177,876995300,178.75
6,FILM,MD Pictures Tbk.,https://s3.goapi.id/logo/FILM.jpg,2023-12-15,3740,3890,3740,3870,16608000,3810.0
7,DEWA,Darma Henwa Tbk,https://s3.goapi.id/logo/DEWA.jpg,2023-12-15,60,64,55,61,1677057000,60.0
8,EMTK,Elang Mahkota Teknologi Tbk.,https://s3.goapi.id/logo/EMTK.jpg,2023-12-15,580,595,555,570,102288700,575.0
9,CUAN,Petrindo Jaya Kreasi Tbk.,https://s3.goapi.id/logo/CUAN.jpg,2023-12-15,12000,13050,11725,12800,16586300,12393.75


### Test Flask

In [None]:
# !pip install flask

In [None]:
# !pip install pyngrok

In [None]:
# !ngrok authtoken 2Z5qKCbKRO5vainLb7Rwi2vWF9s_2sPbwVc5KEaNQt4yZwoFz

In [None]:
# global data_selected_saham
# # Import library
# from flask import Flask
# from pyngrok import ngrok
# import json

# import os
# import threading

# # Inisialisasi Flask
# app = Flask(__name__)
# port = "5000"

# public_url = ngrok.connect(port).public_url
# print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:{}\"".format(public_url, port))

# @app.route('/')
# def index():
#   return 'hello world'


# @app.route('/saham')
# def recommendations():
#     # Muat model
#     with open('model.pkl', 'rb') as f:
#         cosine_sim_df = pickle.load(f)

#     # Lakukan prediksi
#     recommendations = saham_recommendations(data_selected_saham, similarity_data=cosine_sim_df)

#     # Kembalikan hasil prediksi
#     return json.dumps(recommendations.to_dict(orient='records'))

# # Jalankan Flask
# if __name__ == '__main__':
#     threading.Thread(target=app.run, kwargs={"use_reloader": False}).start()