In [1]:
import openai
import os
import pandas as pd
import time
import numpy
import matplotlib
from openai.embeddings_utils import cosine_similarity

text_file = open("API_key.txt", "r")

def get_embedding(task):
    time.sleep(2)
    response = openai.Embedding.create(
            input=task,
            model="text-embedding-ada-002"
        )
    return response['data'][0]['embedding']

def search_functions(df, code_query):
    embedding = get_embedding(code_query)
    df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))
    res = df.sort_values('similarities', ascending=False).head(100)
    return res

def split_file(filename,blocks):
    with open(filename, 'r') as f:
        lines = f.readlines()

    start_line = 0
    prev_end_line = -2
    prev_line=" "
    for i, line in enumerate(lines):
        if  line[0]!=" " and (len(prev_line.replace(" ", "").replace("\t", ""))==1) :
            stripped_line = line.replace(" ", "").replace("\t", "")
            prev_end_line = i
            extracted_lines = lines[start_line:prev_end_line]

            # join the extracted lines into a string
            extracted_text = "".join(extracted_lines)
            if extracted_text != "":
                blocks.append([filename,start_line,prev_end_line,extracted_text])
            #print(filename)

            start_line = i
        prev_line=line
    extracted_lines = lines[prev_end_line:len(lines)]
    # join the extracted lines into a string
    extracted_text = "".join(extracted_lines)
    if prev_end_line==-2:
        prev_end_line=0
    blocks.append([filename,prev_end_line,len(lines),extracted_text])
    return blocks

import shutil

def create_clone(path):
    # Remove folder if it exists
    if os.path.exists("AIFiles"):
        shutil.rmtree("AIFiles")
    # Create folder
    os.mkdir("AIFiles")
    # Copy everything in path to AIFiles
    for filename in os.listdir(path):
        src = os.path.join(path, filename)
        dst = os.path.join("AIFiles", filename)
        shutil.copy(src, dst)

def train_AI(path):
    print("Training AI")
    create_clone(path)
    file_paths_details = []
    Files_to_ignore = open(path+"/.AIIgnore", "r").read().splitlines()
    print("Files and directories to ignore:")
    print(Files_to_ignore)

    for root, directories, files in os.walk(path):
        # Exclude any directories that appear in the ignore list
        directories[:] = [d for d in directories if d not in Files_to_ignore]
        print("Directories:", directories)
        for filename in files:
            if filename not in Files_to_ignore:
                print(filename)
                # Append the path to each file to the file_paths list
                file_paths_details.append(os.path.join(root, filename))
    df4 = pd.DataFrame(file_paths_details)
    df4.columns = ["filepath"]
    #create a new column that has last synced time
    df4['last_sync'] = time.time()
    #use the lambda function to get the last modified time of the file os.getmtime fielname
    df4['last_updated'] = df4.filepath.apply(lambda x: os.path.getmtime(x))
    df4.to_csv("df4.csv", index=False)


In [2]:
train_AI('/Users/prathameshsutone/Desktop/PR_Raiser/repo')

Training AI
Files and directories to ignore:
['node_modules']
Directories: []
index.html
styles.css
dummy_1.txt
login-page.js
.AIIgnore


In [56]:
def parse_patch(patch_string):
    lines = patch_string.split("\n")
    commands = []
    i=0
    j=0
    
    lines = [line for line in patch_string.split("\n") if not line.startswith("\\")]

    
    while (i < len(lines)-1):
        #print("Ho")
        #print(lines[i][0])
        if(lines[i][0] not in['>','<',"\\",'-']):
            #print("Command = " + lines[i])
            j=i+1
            #print(j)
            if lines[i].find("a") > 0:
                    cmd = int(lines[i].split("a")[0])
                    new_line_numbers = tuple(map(int, lines[i].split("a")[1].split(",")))
                    count = new_line_numbers[1]-new_line_numbers[0]
                    #print(count)
                    new_lines = lines[i+1:i+count+2]
                    for j in range(len(new_lines)):
                        new_lines[j] = new_lines[j][1:]
                    command = ("add", cmd, new_line_numbers,new_lines)
                    #print(command)
            elif lines[i].find("c") > 0:
                cmd = lines[i].split("c")
                pre = cmd[0]
                post = cmd[1]
                new_count=0
                old_count=0
                
                if pre.find(",")>0:
                    old_line_numbers = tuple(map(int, lines[i].split("c")[0].split(",")))
                    old_count = old_line_numbers[1]-old_line_numbers[0]+1
                    #old_lines = lines[i+1:i+old_count+1]
                else:
                    old_line_numbers = (int(pre),int(pre))
                    #print(type(old_line_numbers))
                    old_count=1
                    #old_lines = lines[i+1:i+old_count+2]
                
                
                if post.find(",")>0:
                    #print("Pre = ")
                    #print(pre)
                    new_line_numbers = tuple(map(int, lines[i].split("c")[1].split(",")))
                    new_count = new_line_numbers[1]-new_line_numbers[0]+1
                    #new_lines = lines[i+1:i+new_count+2]
                else:
                    new_line_numbers = (int(post),int(post))
                    new_count=1
                    #new_lines = lines[i+1:i+new_count+1]
               
                old_lines = [line[2:] for line in lines[i+1:i+old_count+1]]
                new_lines = [line[2:] for line in lines[i+old_count+2:i+old_count+new_count+2]]
                command = ("change", old_line_numbers, new_line_numbers,old_lines,new_lines)
                #print(command)
            elif lines[i].find("d") > 0:
                cmd = lines[i].split("d")[0]
                old_line_numbers = tuple(map(int, lines[i].split("d")[0].split(",")))
                count = old_line_numbers[1]-old_line_numbers[0]
                #print(count)
                command = ("del", old_line_numbers)
                #print(command)
            
            commands.append(command)
            while((j<len(lines)-1) and lines[j][0] in ['>','<',"\\",'-']):
                #print(str(j-i)+" |"+lines[j])
                j+=1
            #print(" ")
        i=j
    return commands

In [4]:
def get_clone_path(path):
    # split the path by '/'
    path_parts = path.split('/')

    # get the index of "repo"
    repo_index = path_parts.index("repo")

    # replace "repo" with "AIFiles" in the path and join the path parts back together
    new_path = "/".join(path_parts[:repo_index] + ["AIFiles"] + path_parts[repo_index+1:])

    return new_path

In [5]:
import subprocess

def get_diff(old_file_path, new_file_path):
    result = subprocess.run(["diff", old_file_path, new_file_path], capture_output=True, text=True)
    return result.stdout

In [66]:
import json
import os
import pandas as pd

def read_info():
    # Open the info.json file and load its contents into a Python dictionary
    with open('info.json') as f:
        data = json.load(f)

    # Get the home_path value from the dictionary
    path = data['home_path']
    return path
    
def sync_AI():
    df4 = pd.read_csv('df4.csv')
    df2 = pd.read_csv('df2.csv')
    df = pd.read_csv('df.csv')
    
    path = read_info()
    print("Syncing AI :")
    file_paths_details = []
    Files_to_ignore = open(path+"/.AIIgnore", "r").read().splitlines()
    #print("Files and directories to ignore:")
    #print(Files_to_ignore)

    for root, directories, files in os.walk(path):
            # Exclude any directories that appear in the ignore list
            directories[:] = [d for d in directories if d not in Files_to_ignore]
            #print("Directories:", directories)
            for filename in files:
                if filename not in Files_to_ignore:
                    #print(filename)
                    # Append the path to each file to the file_paths list
                    file_paths_details.append(os.path.join(root, filename))

    # Find the set difference between file_paths_details and df4["filepath"]
    new_file_paths = set(file_paths_details) - set(df4["filepath"])
    # Iterate over the new_file_paths set and create a new row for each file path
    new_rows = []
    for file_path in new_file_paths:
        # Create a dictionary with the values for each column in the new row
        row_dict = {
            "filepath": file_path
            # Add any other columns you need for the new row
        }
        # Append the new row to the new_rows list
        new_rows.append(row_dict)
        print("New File : "+file_path)

    # Convert the new_rows list of dictionaries to a pandas DataFrame
    new_df = pd.DataFrame(new_rows)
    df4 = pd.concat([df4, new_df], ignore_index=True)
    
    del_file_paths = set(df4["filepath"]) - set(file_paths_details)
    
    # Iterate over the del_file_paths set and remove the corresponding rows from dataframes
    for file_path in del_file_paths:
        df4 = df4[df4["filepath"] != file_path]
        df2 = df2[df2["filepath"] != file_path]
        df = df[df["filepath"] != file_path]
        print("Deleted File : "+file_path)
    
    #display(df)
    #display(df2)
    #display(df4)
    
    df_old = df
    
    i=0
    for ind in df4.index:
        #print(df4['filepath'][ind])
        i+=1
        if(df4['last_sync'][ind]>df4['last_updated'][ind]):
            print("Updating : "+df4['filepath'][ind])
            diff= get_diff(get_clone_path(df4['filepath'][ind]),df4['filepath'][ind])
            #print(diff)
            apply_patch(df4['filepath'][ind],diff)
        print(" ")
    
    df['LineNumber'] = df.groupby('filepath').cumcount()
    
    df4.to_csv("df4.csv", index=False)
    df2.to_csv("df2.csv", index=False)
    df.to_csv("df.csv", index=False)    

In [67]:
df = pd.read_csv('df.csv')

def add2df(filename, location, new_rows):
    """
    Adds new rows to a dataframe between the start and end indices.
    """
    global df
    new_df = pd.DataFrame(new_rows,columns=['Code'])
    new_df['LineNumber'] = 0
    new_df['code_embedding'] = None
    new_df['filepath'] = filename
    # Append the new_df to the original DataFrame at the given location index
    df = pd.concat([df.iloc[:location], new_df, df.iloc[location:]])
    
    # Display the new_df
    display(df[df['filepath']==filename])
    return 
    
    
def delfromdf(filename, start, end):
    """
    Deletes rows from a dataframe between the start and end indices.
    """
    global df
    df = df.drop(df.index[start:end+1])
    display(df[df['filepath']==filename])
    
    return

def changedf(filename, old_start, old_end, new_start, new_end, new_rows):
    """
    Replaces rows in a dataframe between old_start and old_end with new_lines,
    which are inserted between new_start and new_end.
    """
    global df
    new_df = pd.DataFrame(new_rows,columns=['Code'])
    new_df['LineNumber'] = 0
    new_df['code_embedding'] = None
    new_df['filepath'] = filename
   
    df = df.drop(df.index[old_start:old_end-1])

    # Insert new rows
    df = pd.concat([df.iloc[:old_start], new_df, df.iloc[old_end:]])

    display(df[df['filepath']==filename])
    return
    
def apply_patch(filename,patch):    
    commands = parse_patch(patch)
    print("Applying Patch for :" + filename)
    for command in commands:
        if command[0]=='change':
            changedf(filename,command[1][0],command[1][1],command[2][0],command[2][1],command[4])
            print("Change")
            print(command)
        elif command[0]=='add':
            print("Adding")
            add2df(filename,command[1],command[3])
        elif (command[0]=='del'):
            print("Delete")
            delfromdf(filename,command[1][0],command[1][1])
    return

In [65]:
import pandas as pd

df = pd.read_csv('df.csv')
df2 = pd.read_csv('df2.csv')
df4 = pd.read_csv('df4.csv')

sync_AI()

Syncing AI :
Updating : /Users/prathameshsutone/Desktop/PR_Raiser/repo/index.html
Applying Patch for :/Users/prathameshsutone/Desktop/PR_Raiser/repo/index.html
Delete


Unnamed: 0,filepath,Code,LineNumber,code_embedding
0,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,<!DOCTYPE html>\n,0,"[-0.01256885752081871, 0.0028874874114990234, ..."
1,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,"<html lang=""en"">\n",1,"[-0.008912181481719017, 0.010274510830640793, ..."
2,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,\n,2,"[0.0025078936014324427, -0.018215738236904144,..."
3,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,<!- File References ->\n,3,"[-0.025017328560352325, -0.0020156055688858032..."
4,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,\n,4,"[0.0025078936014324427, -0.018215738236904144,..."
5,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,<head>\n,5,"[-0.019115811213850975, 0.011791320517659187, ..."
6,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,"<meta charset=""UTF-8"">\n",6,"[-0.016457363963127136, 0.0068514239974319935,..."
7,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,"<meta name=""viewport"" content=""width=device-...",7,"[-0.005645757541060448, 0.0023967840243130922,..."
8,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,<title>Login</title>\n,8,"[-0.001449250034056604, 0.011215200647711754, ..."
9,/Users/prathameshsutone/Desktop/PR_Raiser/repo...,"<link rel=""stylesheet"" href=""style.css"">\n",9,"[-0.010547470301389694, 0.023108486086130142, ..."


 
Updating : /Users/prathameshsutone/Desktop/PR_Raiser/repo/styles.css
Applying Patch for :/Users/prathameshsutone/Desktop/PR_Raiser/repo/styles.css
 
Updating : /Users/prathameshsutone/Desktop/PR_Raiser/repo/dummy_1.txt
Applying Patch for :/Users/prathameshsutone/Desktop/PR_Raiser/repo/dummy_1.txt
 
Updating : /Users/prathameshsutone/Desktop/PR_Raiser/repo/login-page.js
Applying Patch for :/Users/prathameshsutone/Desktop/PR_Raiser/repo/login-page.js
 
Updating : /Users/prathameshsutone/Desktop/PR_Raiser/repo/.AIIgnore
Applying Patch for :/Users/prathameshsutone/Desktop/PR_Raiser/repo/.AIIgnore
 
