In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Path to the folder where you saved the model
save_folder = "./saved_model_codeT5_loaded"

# Load tokenizer and model from local folder
tokenizer = AutoTokenizer.from_pretrained(save_folder)
model = AutoModelForSequenceClassification.from_pretrained(save_folder)

# Set model to evaluation mode
model.eval()

print("Model and tokenizer loaded successfully!")

Model and tokenizer loaded successfully!


In [2]:
id2label = {0:"O(1)", 1:"O(n^3)", 2:"O(n)", 3:"O(2^n)", 4:"O(n^2)"}

In [3]:
!pip install transformers datasets torch google-generativeai python-dotenv



In [None]:
import torch
import google.generativeai as genai

# Directly pass your Gemini 2.5 Pro API key
import os

api_key = os.environ.get("GENIE_API_KEY")
genai.configure(api_key=api_key)

# Load the Gemini 2.5 Pro model
gemini_model = genai.GenerativeModel("gemini-2.5-pro")

In [11]:
import json

t5_model_path = "./saved_model_codeT5_loaded"
t5_tokenizer = AutoTokenizer.from_pretrained(t5_model_path)
t5_model = AutoModelForSequenceClassification.from_pretrained(t5_model_path)
t5_model.eval()

T5ForSequenceClassification(
  (transformer): T5Model(
    (shared): Embedding(32100, 512)
    (encoder): T5Stack(
      (embed_tokens): Embedding(32100, 512)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=512, out_features=512, bias=False)
                (k): Linear(in_features=512, out_features=512, bias=False)
                (v): Linear(in_features=512, out_features=512, bias=False)
                (o): Linear(in_features=512, out_features=512, bias=False)
                (relative_attention_bias): Embedding(32, 8)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseActDense(
                (wi): Linear(in_features=512, out_features=2048, bias=False)
                (wo): Linear(in_featu

In [12]:
def predict_tc(code_snippet):
    inputs = t5_tokenizer(code_snippet, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = t5_model(**inputs)
    logits = outputs.logits
    pred_id = torch.argmax(logits, dim=-1).item()
    return id2label[pred_id]

In [13]:
import re

def analyze_with_gemini(code_snippet):
    prompt = f"""
Analyze this Python code and return three things:

1. Time Complexity (TC) in Big-O notation.
2. Space Complexity (SC) in Big-O notation.
3. One short suggestion to improve the code (if any).

Format EXACTLY like this:
TC: <time complexity>
SC: <space complexity>
Suggestion: <short suggestion>

{code_snippet}
"""
    try:
        response = gemini_model.generate_content(prompt)
        text = response.text.strip()

        # Extract TC, SC, Suggestion
        tc_match = re.search(r"TC:\s*(.*)", text)
        sc_match = re.search(r"SC:\s*(.*)", text)
        sg_match = re.search(r"Suggestion:\s*(.*)", text)

        result = {
            "tc": tc_match.group(1) if tc_match else "Unknown",
            "sc": sc_match.group(1) if sc_match else "Unknown",
            "suggestion": sg_match.group(1) if sg_match else "No suggestions"
        }

    except Exception as e:
        result = {"tc": "Unknown", "sc": "Unknown", "suggestion": "Analysis failed"}
    return result


In [8]:
def hybrid_analysis(code_snippet):
    # Step 1: T5 predicts TC
    tc_model = predict_tc(code_snippet)

    # Step 2: Gemini predicts TC & SC + Suggestion
    gemini_output = analyze_with_gemini(code_snippet)
    tc_gemini = gemini_output["tc"]
    sc_gemini = gemini_output["sc"]
    suggestion = gemini_output["suggestion"]

    # Step 3: Check mismatch
    mismatch_flag = (tc_model != tc_gemini)

    # Step 4: If mismatch, use Gemini's TC
    final_tc = tc_gemini if mismatch_flag else tc_model

    result = {
        "code": code_snippet,
        "tc_final": final_tc,
        "sc_gemini": sc_gemini,
        "suggestion": suggestion
    }

    return result

In [14]:
code_example = """
    def binary_search(arr, target):
    low, high = 0, len(arr)-1
    while low <= high:
        mid = (low + high) // 2
        if arr[mid] == target:
            return mid
        elif arr[mid] < target:
            low = mid + 1
        else:
            high = mid - 1
    return -1
"""

result = hybrid_analysis(code_example)
print(result)

{'code': '\n    def binary_search(arr, target):\n    low, high = 0, len(arr)-1\n    while low <= high:\n        mid = (low + high) // 2\n        if arr[mid] == target:\n            return mid\n        elif arr[mid] < target:\n            low = mid + 1\n        else:\n            high = mid - 1\n    return -1\n', 'tc_final': 'O(log n)', 'sc_gemini': 'O(1)', 'suggestion': 'Calculate mid as `low + (high - low) // 2` to prevent potential integer overflow on very large arrays.'}
