In [1]:
from ollama import chat
from ollama import ChatResponse
import torch
import json
import os

In [2]:
def load_json(filepath):
    try:
        with open(filepath, 'r') as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        print(f"Error: File not found at {filepath}")
        return None
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in {filepath}")
        return None

In [5]:
def save_json(data, filepath):
    try:
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=4)
        print(f"Data saved successfully to {filepath}")
    except Exception as e:
        print(f"Error saving data to {filepath}: {e}")

In [4]:
system_prompt_dict = {}

In [5]:
system_prompt_dict["C#"] = """
You are an expert C# code analyst. Your task is to dissect a given C# code snippet and provide a structured JSON response detailing all of its components and overall functionality. You must strictly adhere to the following JSON format:
{
  "output": {
    "programming_language": "C#",
    "components": [
      {
        "component_type": "<TYPE_OF_CSHARP_COMPONENT>",
        "component_name": "<NAME_OF_COMPONENT_IF_APPLICABLE>",
        "component_code": "<THE_ACTUAL_CSHARP_CODE_OF_THE_COMPONENT>",
        "component_description": "<DETAILED_DESCRIPTION_OF_COMPONENT_FUNCTIONALITY>"
      },
      { /* ... more components ... */ }
    ],
    "overall_description": "<DETAILED_SUMMARY/DESCRIPTION_OF_THE_ENTIRE_CSHARP_CODE_FUNCTIONALITY>"
  }
}

**Instructions:**
1. Identify the Programming Language: The script is written in C#.
2. Component Types: Use appropriate component types, for example, USING_DIRECTIVE, NAMESPACE_DECLARATION, CLASS_DEFINITION, STRUCT_DEFINITION, INTERFACE_DEFINITION, ENUM_DEFINITION, METHOD_DEFINITION, PROPERTY_DEFINITION, CONSTRUCTOR, DESTRUCTOR, MAIN_METHOD, LOOP, CONDITIONAL_STATEMENT, EVENT_DEFINITION, DELEGATE_DEFINITION, ASYNC_METHOD.
3. Component Names: Provide the correct identifier for each component (method name, class name, variable name, etc.), or NULL if not applicable.
4. Component Code: Include the complete, unmodified C# code for each component.
5. Component Descriptions: Provide a detailed, technical explanation of what each component does, including object-oriented features, data structures, event handling, threading, and interactions.
6. Overall Description: Provide a detailed summary of the entire C# code, explaining its purpose, architecture, and how components interact.
7. Strict JSON Output: Your ENTIRE response must be ONLY the valid JSON object. Do not include any explanations, introductions, or additional text outside the JSON structure.

Analyze the following C# code properly and return ONLY the JSON response with no additional text:
"""

In [6]:
def string_to_json(input_string):
    try:
        data = json.loads(input_string)
        return data, None
    except json.JSONDecodeError as e:
        return None, str(e)

In [7]:
def make_data(data, system_prompt):
    output_data = {}

    for key, value in data.items():
        model_name = "qwen2.5-coder:32b"

        response: ChatResponse = chat(model=model_name, messages=[
            {
                'role': 'system',
                'content': system_prompt,
            },
            { 
                'role': 'user',
                'content': value,
            }
        ])

        output_data[key] = {
            "input" : value,
            "output" : response['message']['content']
        }

        print("Processed:", key)
            
    return output_data

In [8]:
data = load_json("csharp_train.json")

num_keys = 25
if not isinstance(data, dict):
    raise TypeError("Input must be a dictionary.")

if not isinstance(num_keys, int) or num_keys < 0:
    raise ValueError("num_keys must be a non-negative integer.")

sliced_dict = {}
count = 0
for key, value in data.items():
    if count < num_keys:
        sliced_dict[key] = value
        count += 1
    else:
        break

del data
del count

In [9]:
print("Length of sliced dictionary:", len(sliced_dict))

Length of sliced dictionary: 25


In [10]:
output = make_data(sliced_dict, system_prompt_dict["C#"])

Processed: csharp_0
Processed: csharp_1
Processed: csharp_2
Processed: csharp_3
Processed: csharp_4
Processed: csharp_5
Processed: csharp_6
Processed: csharp_7
Processed: csharp_8
Processed: csharp_9
Processed: csharp_10
Processed: csharp_11
Processed: csharp_12
Processed: csharp_13
Processed: csharp_14
Processed: csharp_15
Processed: csharp_16
Processed: csharp_17
Processed: csharp_18
Processed: csharp_19
Processed: csharp_20
Processed: csharp_21
Processed: csharp_22
Processed: csharp_23
Processed: csharp_24


In [14]:
for k, v in output.items():
    print(f"Input: \n{v['input']}\n")
    print(f"Output: \n{v['output']}\n")
    print("-" * 80)

Input: 
@{
    var total = 0;
    var totalMessage = "";
    @* a multiline
      razor comment embedded in csharp *@
    if (IsPost) {

        // Retrieve the numbers that the user entered.
        var num1 = Request["text1"];
        var num2 = Request["text2"];

        // Convert the entered strings into integers numbers and add.
        total = num1.AsInt() + num2.AsInt();
		<italic><bold>totalMessage = "Total = " + total;</bold></italic>
    }
}

<!DOCTYPE html>
<html lang="en">
  <head>
    <title>Add Numbers</title>
    <meta charset="utf-8" />
  </head>
<body>
  <p>Enter two whole numbers and then click <strong>Add</strong>.</p>
  <form action="" method="post">
    <p><label for="text1">First Number:</label>
      <input type="text" name="text1" />
    </p>
    <p><label for="text2">Second Number:</label>
      <input type="text" name="text2" />
    </p>
    <p><input type="submit" value="Add" /></p>
  </form>

	@* now we call the totalMessage method
	   (a multi line razor c

In [16]:
import pickle
# Save the output to a pickle file
with open("data.pickle", "wb") as file:
    pickle.dump(output, file)

In [1]:
import pickle
# Load the output from the pickle file
with open("data.pickle", "rb") as file:
    loaded_output = pickle.load(file)
print("Loaded output from pickle file:")
for i, (k, v) in enumerate(loaded_output.items()):
    print(f"Input {i}: \n{v['input']}\n")
    print(f"Output {i}: \n{v['output']}\n")
    print("-" * 80)

Loaded output from pickle file:
Input 0: 
@{
    var total = 0;
    var totalMessage = "";
    @* a multiline
      razor comment embedded in csharp *@
    if (IsPost) {

        // Retrieve the numbers that the user entered.
        var num1 = Request["text1"];
        var num2 = Request["text2"];

        // Convert the entered strings into integers numbers and add.
        total = num1.AsInt() + num2.AsInt();
		<italic><bold>totalMessage = "Total = " + total;</bold></italic>
    }
}

<!DOCTYPE html>
<html lang="en">
  <head>
    <title>Add Numbers</title>
    <meta charset="utf-8" />
  </head>
<body>
  <p>Enter two whole numbers and then click <strong>Add</strong>.</p>
  <form action="" method="post">
    <p><label for="text1">First Number:</label>
      <input type="text" name="text1" />
    </p>
    <p><label for="text2">Second Number:</label>
      <input type="text" name="text2" />
    </p>
    <p><input type="submit" value="Add" /></p>
  </form>

	@* now we call the totalMessag

In [None]:
# After manual verification

final_data = {}
remove_ind_num = [9, 10, 16, 20, 21]
for i, (k, v) in enumerate(loaded_output.items()):
    if i not in remove_ind_num:
        print(f"Keeping index {i} in output.")
        final_data[k] = v

Keeping index 0 in output.
Keeping index 1 in output.
Keeping index 2 in output.
Keeping index 3 in output.
Keeping index 4 in output.
Keeping index 5 in output.
Keeping index 6 in output.
Keeping index 7 in output.
Keeping index 8 in output.
Keeping index 11 in output.
Keeping index 12 in output.
Keeping index 13 in output.
Keeping index 14 in output.
Keeping index 15 in output.
Keeping index 17 in output.
Keeping index 18 in output.
Keeping index 19 in output.
Keeping index 22 in output.
Keeping index 23 in output.
Keeping index 24 in output.


In [3]:
final_data.keys()

dict_keys(['csharp_0', 'csharp_1', 'csharp_2', 'csharp_3', 'csharp_4', 'csharp_5', 'csharp_6', 'csharp_7', 'csharp_8', 'csharp_11', 'csharp_12', 'csharp_13', 'csharp_14', 'csharp_15', 'csharp_17', 'csharp_18', 'csharp_19', 'csharp_22', 'csharp_23', 'csharp_24'])

In [6]:
# Save the output to a JSON file
import json
output_json_path = "final_csharp_data.json"
try:
    save_json(final_data, output_json_path)
except Exception as e:
    print(f"Error saving final data to JSON: {e}")

Data saved successfully to final_csharp_data.json
