In [1]:
from ollama import chat
from ollama import ChatResponse
import torch
import json
import os

In [2]:
def load_json(filepath):
    try:
        with open(filepath, 'r') as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        print(f"Error: File not found at {filepath}")
        return None
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in {filepath}")
        return None

In [1]:
def save_json(data, filepath):
    try:
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=4)
        print(f"Data saved successfully to {filepath}")
    except Exception as e:
        print(f"Error saving data to {filepath}: {e}")

In [4]:
system_prompt_dict = {}

In [5]:
system_prompt_dict["Scala"] = """
You are an expert Scala code analyst. Your task is to dissect a given Scala code snippet and provide a structured JSON response detailing all of its components and overall functionality. You must strictly adhere to the following JSON format:
{
  "output": {
    "programming_language": "Scala",
    "components": [
      {
        "component_type": "<TYPE_OF_SCALA_COMPONENT>",
        "component_name": "<NAME_OF_COMPONENT_IF_APPLICABLE>",
        "component_code": "<THE_ACTUAL_SCALA_CODE_OF_THE_COMPONENT>",
        "component_description": "<DETAILED_DESCRIPTION_OF_COMPONENT_FUNCTIONALITY>"
      },
      { /* ... more components ... */ }
    ],
    "overall_description": "<DETAILED_SUMMARY/DESCRIPTION_OF_THE_ENTIRE_SCALA_CODE_FUNCTIONALITY>"
  }
}

**Instructions:**
1. Identify the Programming Language: The script is written in Scala.
2. Component Types: Use appropriate component types, for example, OBJECT_DEFINITION, CLASS_DEFINITION, TRAIT_DEFINITION, CASE_CLASS_DEFINITION, FUNCTION_DEFINITION, VAL_DECLARATION, VAR_DECLARATION, FOR_COMPREHENSION, PATTERN_MATCHING, IMPLICIT_CONVERSION, LAMBDA_EXPRESSION.
3. Component Names: Provide the correct identifier for each component (function name, class name, variable name, etc.), or NULL if not applicable.
4. Component Code: Include the complete, unmodified Scala code for each component.
5. Component Descriptions: Provide a detailed, technical explanation of what each component does, including Scala-specific features like pattern matching, immutability, higher-order functions, and functional programming concepts.
6. Overall Description: Provide a detailed summary of the entire Scala code, explaining its purpose, architecture, and how components interact.
7. Strict JSON Output: Your ENTIRE response must be ONLY the valid JSON object. Do not include any explanations, introductions, or additional text outside the JSON structure.

Analyze the following Scala code properly and return ONLY the JSON response with no additional text:
"""

In [6]:
def string_to_json(input_string):
    try:
        data = json.loads(input_string)
        return data, None
    except json.JSONDecodeError as e:
        return None, str(e)

In [7]:
def make_data(data, system_prompt):
    output_data = {}

    for key, value in data.items():
        model_name = "qwen2.5-coder:32b"

        response: ChatResponse = chat(model=model_name, messages=[
            {
                'role': 'system',
                'content': system_prompt,
            },
            { 
                'role': 'user',
                'content': value,
            }
        ])

        output_data[key] = {
            "input" : value,
            "output" : response['message']['content']
        }

        print("Processed:", key)
            
    return output_data

In [8]:
data = load_json("scala_train.json")

num_keys = 25
if not isinstance(data, dict):
    raise TypeError("Input must be a dictionary.")

if not isinstance(num_keys, int) or num_keys < 0:
    raise ValueError("num_keys must be a non-negative integer.")

sliced_dict = {}
count = 0
for key, value in data.items():
    if count < num_keys:
        sliced_dict[key] = value
        count += 1
    else:
        break

del data
del count

In [9]:
print("Length of sliced dictionary:", len(sliced_dict))

Length of sliced dictionary: 25


In [10]:
output = make_data(sliced_dict, system_prompt_dict["Scala"])

Processed: scala_0
Processed: scala_1
Processed: scala_2
Processed: scala_3
Processed: scala_4
Processed: scala_5
Processed: scala_6
Processed: scala_7
Processed: scala_8
Processed: scala_9
Processed: scala_10
Processed: scala_11
Processed: scala_12
Processed: scala_13
Processed: scala_14
Processed: scala_15
Processed: scala_16
Processed: scala_17
Processed: scala_18
Processed: scala_19
Processed: scala_20
Processed: scala_21
Processed: scala_22
Processed: scala_23
Processed: scala_24


In [11]:
for k, v in output.items():
    print(f"Input: \n{v['input']}\n")
    print(f"Output: \n{v['output']}\n")
    print("-" * 80)

Input: 
package helpers

import org.specs2.mutable._
import com.ruimo.recoeng.RecoEngApi
import com.ruimo.recoeng.json.{JsonResponseHeader, OnSalesJsonResponse, SalesItem, TransactionMode, TransactionSalesMode, SortOrder, JsonRequestPaging, Desc, Asc, ScoredItem}
import com.ruimo.recoeng.json.RecommendByItemJsonResponse
import play.api.libs.json.{JsSuccess, JsResult}
import models.LoginSession
import org.mockito.Mockito.mock
import models.PersistedTransaction
import models.TransactionLogItem
import models.TransactionLogCoupon
import models.TransactionLogHeader
import models.TransactionType
import models.Address
import models.ItemName
import helpers.Helper._
import com.ruimo.scoins.Scoping._

class RecommendEngineSpec extends Specification {
  "Recommend engine" should {
    "Can send transaction" in {
      val api: RecoEngApi = new RecoEngApi {
        def onSales(
          requestTime: Long,
          sequenceNumber: Long,
          transactionMode: TransactionMode,
          transa

In [12]:
import pickle
# Save the output to a pickle file
with open("data.pickle", "wb") as file:
    pickle.dump(output, file)

In [2]:
import pickle
# Load the output from the pickle file
with open("data.pickle", "rb") as file:
    loaded_output = pickle.load(file)
print("Loaded output from pickle file:")
for i, (k, v) in enumerate(loaded_output.items()):
    print(f"Input {i}: \n{v['input']}\n")
    print(f"Output {i}: \n{v['output']}\n")
    print("-" * 80)

Loaded output from pickle file:
Input 0: 
package helpers

import org.specs2.mutable._
import com.ruimo.recoeng.RecoEngApi
import com.ruimo.recoeng.json.{JsonResponseHeader, OnSalesJsonResponse, SalesItem, TransactionMode, TransactionSalesMode, SortOrder, JsonRequestPaging, Desc, Asc, ScoredItem}
import com.ruimo.recoeng.json.RecommendByItemJsonResponse
import play.api.libs.json.{JsSuccess, JsResult}
import models.LoginSession
import org.mockito.Mockito.mock
import models.PersistedTransaction
import models.TransactionLogItem
import models.TransactionLogCoupon
import models.TransactionLogHeader
import models.TransactionType
import models.Address
import models.ItemName
import helpers.Helper._
import com.ruimo.scoins.Scoping._

class RecommendEngineSpec extends Specification {
  "Recommend engine" should {
    "Can send transaction" in {
      val api: RecoEngApi = new RecoEngApi {
        def onSales(
          requestTime: Long,
          sequenceNumber: Long,
          transactionMode:

In [3]:
loaded_output["scala_4"]["output"] = loaded_output["scala_4"]["output"][:loaded_output["scala_4"]["output"].index("This JSON structure capture")-2]

In [4]:
loaded_output["scala_15"]["output"] = loaded_output["scala_15"]["output"][:loaded_output["scala_15"]["output"].index("```json")-2]

In [5]:
loaded_output["scala_23"]["output"] = loaded_output["scala_23"]["output"][:loaded_output["scala_23"]["output"].index("```")-1]

In [None]:
# loaded_output["rust_17"]["output"] = loaded_output["rust_17"]["output"] + "\n}"

In [6]:
# After manual verification

final_data = {}
remove_ind_num = [0, 3, 5, 7, 10, 11, 13, 20]
for i, (k, v) in enumerate(loaded_output.items()):
    if i not in remove_ind_num:
        print(f"Keeping index {i} in output.")
        final_data[k] = v

Keeping index 1 in output.
Keeping index 2 in output.
Keeping index 4 in output.
Keeping index 6 in output.
Keeping index 8 in output.
Keeping index 9 in output.
Keeping index 12 in output.
Keeping index 14 in output.
Keeping index 15 in output.
Keeping index 16 in output.
Keeping index 17 in output.
Keeping index 18 in output.
Keeping index 19 in output.
Keeping index 21 in output.
Keeping index 22 in output.
Keeping index 23 in output.
Keeping index 24 in output.


In [7]:
final_data.keys()

dict_keys(['scala_1', 'scala_2', 'scala_4', 'scala_6', 'scala_8', 'scala_9', 'scala_12', 'scala_14', 'scala_15', 'scala_16', 'scala_17', 'scala_18', 'scala_19', 'scala_21', 'scala_22', 'scala_23', 'scala_24'])

In [8]:
# Save the output to a JSON file
import json
output_json_path = "final_scala_data.json"
try:
    save_json(final_data, output_json_path)
except Exception as e:
    print(f"Error saving final data to JSON: {e}")

Data saved successfully to final_scala_data.json
