In [1]:
from ollama import chat
from ollama import ChatResponse
import torch
import json
import os

In [2]:
def load_json(filepath):
    try:
        with open(filepath, 'r') as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        print(f"Error: File not found at {filepath}")
        return None
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in {filepath}")
        return None

In [1]:
def save_json(data, filepath):
    try:
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=4)
        print(f"Data saved successfully to {filepath}")
    except Exception as e:
        print(f"Error saving data to {filepath}: {e}")

In [4]:
system_prompt_dict = {}

In [5]:
system_prompt_dict["Go"] = """
You are an expert Go code analyst. Your task is to dissect a given Go code snippet and provide a structured JSON response detailing all of its components and overall functionality. You must strictly adhere to the following JSON format:
{
  "output": {
    "programming_language": "Go",
    "components": [
      {
        "component_type": "<TYPE_OF_GO_COMPONENT>",
        "component_name": "<NAME_OF_COMPONENT_IF_APPLICABLE>",
        "component_code": "<THE_ACTUAL_GO_CODE_OF_THE_COMPONENT>",
        "component_description": "<DETAILED_DESCRIPTION_OF_COMPONENT_FUNCTIONALITY>"
      },
      { /* ... more components ... */ }
    ],
    "overall_description": "<DETAILED_SUMMARY/DESCRIPTION_OF_THE_ENTIRE_GO_CODE_FUNCTIONALITY>"
  }
}

**Instructions:**
1. Identify the Programming Language: The script is written in Go.
2. Component Types: Use appropriate component types, for example, PACKAGE_DECLARATION, IMPORT_STATEMENT, FUNCTION_DEFINITION, STRUCT_DEFINITION, METHOD_DEFINITION, INTERFACE_DEFINITION, VARIABLE_DECLARATION, CONDITIONAL_STATEMENT, LOOP, ERROR_HANDLING, GOROUTINE.
3. Component Names: Provide the correct identifier for each component (function name, variable name, struct name, etc.), or NULL if not applicable.
4. Component Code: Include the complete, unmodified Go code for each component.
5. Component Descriptions: Provide a detailed, technical explanation of what each component does, including concurrency handling, data structures, error management, and function interactions.
6. Overall Description: Provide a detailed summary of the entire Go code, explaining its purpose, architecture, and how components interact.
7. Strict JSON Output: Your ENTIRE response must be ONLY the valid JSON object. Do not include any explanations, introductions, or additional text outside the JSON structure.

Analyze the following Go code properly and return ONLY the JSON response with no additional text:
"""

In [6]:
def string_to_json(input_string):
    try:
        data = json.loads(input_string)
        return data, None
    except json.JSONDecodeError as e:
        return None, str(e)

In [7]:
def make_data(data, system_prompt):
    output_data = {}

    for key, value in data.items():
        model_name = "qwen2.5-coder:32b"

        response: ChatResponse = chat(model=model_name, messages=[
            {
                'role': 'system',
                'content': system_prompt,
            },
            { 
                'role': 'user',
                'content': value,
            }
        ])

        output_data[key] = {
            "input" : value,
            "output" : response['message']['content']
        }

        print("Processed:", key)
            
    return output_data

In [8]:
data = load_json("go_train.json")

num_keys = 25
if not isinstance(data, dict):
    raise TypeError("Input must be a dictionary.")

if not isinstance(num_keys, int) or num_keys < 0:
    raise ValueError("num_keys must be a non-negative integer.")

sliced_dict = {}
count = 0
for key, value in data.items():
    if count < num_keys:
        sliced_dict[key] = value
        count += 1
    else:
        break

del data
del count

In [9]:
print("Length of sliced dictionary:", len(sliced_dict))

Length of sliced dictionary: 25


In [10]:
output = make_data(sliced_dict, system_prompt_dict["Go"])

Processed: go_0
Processed: go_1
Processed: go_2
Processed: go_3
Processed: go_4
Processed: go_5
Processed: go_6
Processed: go_7
Processed: go_8
Processed: go_9
Processed: go_10
Processed: go_11
Processed: go_12
Processed: go_13
Processed: go_14
Processed: go_15
Processed: go_16
Processed: go_17
Processed: go_18
Processed: go_19
Processed: go_20
Processed: go_21
Processed: go_22
Processed: go_23
Processed: go_24


In [11]:
for k, v in output.items():
    print(f"Input: \n{v['input']}\n")
    print(f"Output: \n{v['output']}\n")
    print("-" * 80)

Input: 
/*
Copyright 2018 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package tensorflow

// #include <stdlib.h>
// #include "tensorflow/c/c_api.h"
import "C"
import (
	"fmt"
	"unsafe"
)

// makeCShape converts a shape specified in C.int64_t into a Shape.
func makeCShape(shape []C.int64_t) Shape {
	s := Shape{dims: make([]int64, len(shape))}
	for i, n := range shape {
		s.dims[i] = int64(n)
	}
	return s
}

// Attr returns the value of an attribute on op. It returns 

In [12]:
import pickle
# Save the output to a pickle file
with open("data.pickle", "wb") as file:
    pickle.dump(output, file)

In [2]:
import pickle
# Load the output from the pickle file
with open("data.pickle", "rb") as file:
    loaded_output = pickle.load(file)
print("Loaded output from pickle file:")
for i, (k, v) in enumerate(loaded_output.items()):
    print(f"Input {i}: \n{v['input']}\n")
    print(f"Output {i}: \n{v['output']}\n")
    print("-" * 80)

Loaded output from pickle file:
Input 0: 
/*
Copyright 2018 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package tensorflow

// #include <stdlib.h>
// #include "tensorflow/c/c_api.h"
import "C"
import (
	"fmt"
	"unsafe"
)

// makeCShape converts a shape specified in C.int64_t into a Shape.
func makeCShape(shape []C.int64_t) Shape {
	s := Shape{dims: make([]int64, len(shape))}
	for i, n := range shape {
		s.dims[i] = int64(n)
	}
	return s
}

// Attr returns the value 

In [4]:
loaded_output["go_18"]["output"] = loaded_output["go_18"]["output"] + "\n}"

In [5]:
# After manual verification

final_data = {}
remove_ind_num = [0, 2, 4, 17, 20]
for i, (k, v) in enumerate(loaded_output.items()):
    if i not in remove_ind_num:
        print(f"Keeping index {i} in output.")
        final_data[k] = v

Keeping index 1 in output.
Keeping index 3 in output.
Keeping index 5 in output.
Keeping index 6 in output.
Keeping index 7 in output.
Keeping index 8 in output.
Keeping index 9 in output.
Keeping index 10 in output.
Keeping index 11 in output.
Keeping index 12 in output.
Keeping index 13 in output.
Keeping index 14 in output.
Keeping index 15 in output.
Keeping index 16 in output.
Keeping index 18 in output.
Keeping index 19 in output.
Keeping index 21 in output.
Keeping index 22 in output.
Keeping index 23 in output.
Keeping index 24 in output.


In [6]:
final_data.keys()

dict_keys(['go_1', 'go_3', 'go_5', 'go_6', 'go_7', 'go_8', 'go_9', 'go_10', 'go_11', 'go_12', 'go_13', 'go_14', 'go_15', 'go_16', 'go_18', 'go_19', 'go_21', 'go_22', 'go_23', 'go_24'])

In [8]:
# Save the output to a JSON file
import json
output_json_path = "final_go_data.json"
try:
    save_json(final_data, output_json_path)
except Exception as e:
    print(f"Error saving final data to JSON: {e}")

Data saved successfully to final_go_data.json
