In [6]:
from transformers import RobertaTokenizer, T5ForConditionalGeneration
import torch

# setting a seed for result reproduceability
torch.manual_seed(0)

tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base')
model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base')

text = "def greet(user): print(f'hello <extra_id_0>!')"
input_ids = tokenizer(text, return_tensors="pt").input_ids

In [2]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=50, 
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: {user.username} {user.password} {user.email} {user.password} hello\n\nHello,
1: world! {user.name} {user.email} {user.password} {user.password}
2: {user.username} @ {user.email} hello{user.email}


In [3]:
model = T5ForConditionalGeneration.from_pretrained('savedmodels/codet5_condode_dir')

In [4]:
sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=50, 
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)
# lmao this is worse, TODO 

Output:
----------------------------------------------------------------------------------------------------
0: void function ( String arg0 ) { System. out. println ( "srini_string" + arg0 ) ; }
1: void function ( String arg0 ) { System. out. println ( arg0. replaceAll ( "srini_string", "srini_string" ) ) ; System. out. println ( "srini_string" )
2: void function ( String arg0 ) { System. out. print ( "srini_string" ) ; System. out. println ( arg0 ) ; }


In [5]:
text = "{\"code\": \"void function ( ScriptOrFnNode arg0 ) { int loc0 = - 1 ; collectFuncNodes ( arg0 , loc0 , null ) ; }\", \"nl\": \"generate mappings for each function node and parameters and variables names associated with it . concode_field_sep int parentScope concode_elem_sep ArrayList functionBracePositions concode_elem_sep ObjArray funcObjects concode_elem_sep int functionNum concode_elem_sep ArrayList functionVarMappings concode_elem_sep int lastTokenCount concode_elem_sep ArrayList replacedTokens concode_field_sep boolean isInScopeChain concode_elem_sep void reset concode_elem_sep void leaveNestingLevel concode_elem_sep String getMappedToken concode_elem_sep String getPreviousTokenMapping concode_elem_sep void collectFuncNodes concode_elem_sep int sourceCompress concode_elem_sep void enterNestingLevel\"}"
input_ids = tokenizer(text, return_tensors="pt").input_ids
sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=50, 
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)
# lmao this is worse, TODO 

Output:
----------------------------------------------------------------------------------------------------
0: ArrayList function ( int arg0, int arg1, HashMap arg2 ) { ArrayList loc0 = new ArrayList ( ) ; if ( arg0 > 0 ) { for ( int loc1 = 0 ; loc1 < arg0 ; loc
1: ArrayList function ( ) { ArrayList loc0 = new ArrayList ( ) ; if ( arg0!= null ) { for ( int loc1 = 0 ; loc1 < functionNum ; loc1 ++ ) { loc0. add ( arg0
2: ArrayList function ( ) { ArrayList loc0 = new ArrayList ( ) ; for ( int loc1 = 0 ; loc1 < functionNum ; loc1 ++ ) { loc0. add ( arg0. getFunctionName ( loc1 ) )


In [1]:
from transformers import RobertaTokenizer, T5ForConditionalGeneration, T5Config
import torch

# setting a seed for result reproduceability
torch.manual_seed(0)

config = T5Config.from_pretrained('Salesforce/codet5-small')
tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-small')

# model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small')
model = T5ForConditionalGeneration.from_pretrained('../../CodeT5/CodeT5/sh/saved_models/codecontest/codet5_small_100000_lr10_bs2_src320_trg150_pat3_e30/checkpoint-last')

In [14]:
text = "RATING: 800 \n TAGS: implementation, math \n LANGUAGE IS python \n CORRECT SOLUTION \n Polycarp likes squares and cubes of positive integers. Here is the beginning of the sequence of numbers he likes: 1, 4, 8, 9, ....\n\nFor a given number n, count the number of integers from 1 to n that Polycarp likes. In other words, find the number of such x that x is a square of a positive integer number or a cube of a positive integer number (or both a square and a cube simultaneously).\n\nInput\n\nThe first line contains an integer t (1 \u2264 t \u2264 20) \u2014 the number of test cases.\n\nThen t lines contain the test cases, one per line. Each of the lines contains one integer n (1 \u2264 n \u2264 10^9).\n\nOutput\n\nFor each test case, print the answer you are looking for \u2014 the number of integers from 1 to n that Polycarp likes.\n\nExample\n\nInput\n\n\n6\n10\n1\n25\n1000000000\n999999999\n500000000\n\n\nOutput\n\n\n4\n1\n6\n32591\n32590\n23125"
input_ids = tokenizer(text, return_tensors="pt").input_ids

In [15]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=2048,
#         min_length=1000,
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: t = int(raw_input())
for i in range(t):
    n = int(raw_input())
    print(n * (n-1))
50
1: t=int(input())
for i in range(t):
    n=int(input())
    print(n-n)
50
2: for _ in range(int(input())):
	n = int(input())
	m = 0
	while n!= 1:
		m += 1
		n -= 1
	print m
50


In [16]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=2048,
        min_length=500,
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: t = int(raw_input())
for _ in range(t):
    n = int(raw_input())
    print (n - (n * (n + 1)) / 2) * (n + 1) + 1)


if __name__ == '__main__':
    for _ in range(t):
        print (n - 1)
    t -= 1
    print (n - 1) * (n - 1) + 1)


    t -= 1

    print (n - 1) * (n - 1) / 2
    print (n - 1)

    t -= 1



if __name__ == '__main__':
    for _ in range(t):
        n = int(raw_input())
        print (n - 1)
        print (n - 1)
        print (n - 1)
        print (n - 1)

        print (n - 1)
        print (n + 1) + 1
    t -= 1

    for i in range(n):
        print (n - 1)
        print (n - 1)
        print (n)
        print (n + 1)
    for _ in range(n - 1)
        print (n + 1)
    t -= 1)

        print (n + 1)
    print (n - 1)
        print (n - 1)
        print (n)
        print (n - 1)
    for i in range(1, n + 1)
    print (n - 1)
        print (n - 1)

        p

In [17]:
text = "RATING: 800 \n TAGS: implementation, math \n LANGUAGE IS cpp \n CORRECT SOLUTION \n Polycarp likes squares and cubes of positive integers. Here is the beginning of the sequence of numbers he likes: 1, 4, 8, 9, ....\n\nFor a given number n, count the number of integers from 1 to n that Polycarp likes. In other words, find the number of such x that x is a square of a positive integer number or a cube of a positive integer number (or both a square and a cube simultaneously).\n\nInput\n\nThe first line contains an integer t (1 \u2264 t \u2264 20) \u2014 the number of test cases.\n\nThen t lines contain the test cases, one per line. Each of the lines contains one integer n (1 \u2264 n \u2264 10^9).\n\nOutput\n\nFor each test case, print the answer you are looking for \u2014 the number of integers from 1 to n that Polycarp likes.\n\nExample\n\nInput\n\n\n6\n10\n1\n25\n1000000000\n999999999\n500000000\n\n\nOutput\n\n\n4\n1\n6\n32591\n32590\n23125"
input_ids = tokenizer(text, return_tensors="pt").input_ids

In [18]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=2048,
        min_length=500,
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: #include <bits/stdc++.h>
using namespace std;
const int MAXN = 2e5 + 10;
int n;
int main() {
  ios_base::sync_with_stdio(false);
  cin.tie(NULL);
  cout.tie(NULL);
  cout.precision(20);
  cout << fixed;
  cout << fixed;
  cout << fixed;
  return 0;
}
int main() {
  cin >> n;
  for (int i = 1; i <= n; i++) {
    int ans = 0;
    cout << i << endl;
    if (n % i == 0) ans++;
    cout << ans << endl;
    if (n == 1) ans++;
    cout << "1 1";
    cout.setnow();
  }
  int t = 1;
  cout << n;
  if (n == 1) {
    cout << 1;
    cout << "1 3";
  }
  cout << "3 1";
  }
  if (n == 1) {
    cout << "1 5");
  }
  cout << n << endl;
    cout << 2;
    if (n == 3) {
      cout << '0 1' << endl;
    cout << n;
    if (n % 2 == 1) {
      cout.flush();
    cout.write(n / 2)
    cout.precision(20) return 0;
    cout.setnow();
    cout.flush();
    cout << endl;
    cout.setnow(n + 1) cout.set

In [19]:
text = "RATING: 800 \n TAGS: dp \n LANGUAGE IS python \n CORRECT SOLUTION \n Polycarp likes squares and cubes of positive integers. Here is the beginning of the sequence of numbers he likes: 1, 4, 8, 9, ....\n\nFor a given number n, count the number of integers from 1 to n that Polycarp likes. In other words, find the number of such x that x is a square of a positive integer number or a cube of a positive integer number (or both a square and a cube simultaneously).\n\nInput\n\nThe first line contains an integer t (1 \u2264 t \u2264 20) \u2014 the number of test cases.\n\nThen t lines contain the test cases, one per line. Each of the lines contains one integer n (1 \u2264 n \u2264 10^9).\n\nOutput\n\nFor each test case, print the answer you are looking for \u2014 the number of integers from 1 to n that Polycarp likes.\n\nExample\n\nInput\n\n\n6\n10\n1\n25\n1000000000\n999999999\n500000000\n\n\nOutput\n\n\n4\n1\n6\n32591\n32590\n23125"
input_ids = tokenizer(text, return_tensors="pt").input_ids

In [20]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=2048,
        min_length=500,
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: #!/usr/bin/env python

import sys
from collections import deque

def main():
    t = int(sys.stdin.readline())
    for _ in range(t):
        n = int(sys.stdin.readline())
        if n % 2!= 0:
            print n / 2
            continue
        if n == 3:
            print n / 2
            continue
        elif n == 5:
            print n / 2
            continue
        if n == 7:
            print n / 2
            continue
        else:
            print n - 3
            continue
        if n % 2 == 0:
            print n + 1
            break
        if n % 2 == 0:
            print n
            break
        else:
            if n % 2 == 1:
                print n + 1
            else:
            for i in range(2, n):
                if n % i == 0:
                for i in range(2, n):
                if n % i == 1:
                    return
            if n > n -

In [30]:
from transformers import RobertaTokenizer, T5ForConditionalGeneration, T5Config
import torch

# setting a seed for result reproduceability
torch.manual_seed(0)

config = T5Config.from_pretrained('Salesforce/codet5-base')
tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base')

model = T5ForConditionalGeneration.from_pretrained('savedmodels/codet5_condode_dir')
# model = T5ForConditionalGeneration.from_pretrained('../../CodeT5/CodeT5/sh/saved_models/codecontest/codet5_small_100000_lr10_bs2_src320_trg150_pat3_e30/checkpoint-last')

In [41]:
text = "RATING: 800 \n TAGS: implementation, math \n LANGUAGE IS java \n CORRECT SOLUTION \n Polycarp likes squares and cubes of positive integers. Here is the beginning of the sequence of numbers he likes: 1, 4, 8, 9, ....\n\nFor a given number n, count the number of integers from 1 to n that Polycarp likes. In other words, find the number of such x that x is a square of a positive integer number or a cube of a positive integer number (or both a square and a cube simultaneously).\n\nInput\n\nThe first line contains an integer t (1 \u2264 t \u2264 20) \u2014 the number of test cases.\n\nThen t lines contain the test cases, one per line. Each of the lines contains one integer n (1 \u2264 n \u2264 10^9).\n\nOutput\n\nFor each test case, print the answer you are looking for \u2014 the number of integers from 1 to n that Polycarp likes.\n\nExample\n\nInput\n\n\n6\n10\n1\n25\n1000000000\n999999999\n500000000\n\n\nOutput\n\n\n4\n1\n6\n32591\n32590\n23125"
input_ids = tokenizer(text, return_tensors="pt").input_ids

In [42]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=2048,
#         min_length=500,
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: int function ( int arg0 ) { if ( arg0 < 1 ) { return arg0 + 1 ; } if ( arg0 > 2 ) { return arg0 + 3 ; } if ( arg0 < 10 ) { return arg0 + 20 ; } if ( arg0 < 20 ) { return arg0 + 3 ; } if ( arg0 < 10 ) { return arg0 + 1 ; } if ( arg0 > 20 ) { return arg0 + 1 ; } return arg0 - 1 ; }
108
1: int function ( int arg0 ) { if ( arg0 <= 0 ) { return 1 ; } else if ( arg0 < 2 ) { return arg0 + 1 ; } else if ( arg0 == 3 || arg0 == 11 ) { return arg0 + 3 ; } else if ( arg0 <= 20 ) { return arg0 + 1 ; } else { return arg0 - 1 ; } }
108
2: void function ( int arg0 ) { }
108


In [43]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=2048,
        min_length=500,
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: int function ( int arg0 ) { if ( arg0 <= 0 ) { return 5 ; } if ( arg0 <= 10 ) { return 20 ; } if ( arg0 <= 20 ) { return ( arg0 + 1 ) * ( arg0 + 1 ) ; } if ( arg0 <= 20 && arg0 <= 2038 ) { return ( arg0 + 1 ) * ( arg0 + 1 ) ; } else { return ( arg0 + 1 ) * ( arg0 + 1 ) ; } } } } } } ) ; } } } } } } } } } } } } } } ) ; return - 1 ; } } } } } } } } } ; } } } } } } } } } } } ; } } } } } } } } } } } / ( "srini_string" ) ; return ( - 1 ) ; } } } } } } } } ] ] ; } } } } } }
1125
1: void function ( ) { final String loc0 = "srini_string" ; final int loc1 = ( int ) ( ( ( ( long ) arg0 ) / ( ( long ) arg0 ) + ( ( long ) arg0 ) / ( ( long ) ( arg0 + ( ( long ) arg0 ) ) ) ; print ( loc0 + "srini_string" ) ; } } } ) ; } } } } ) ; } } } } } } } } } } } } } } } } } }, 1 ) ; } } } } } } } } }, 10 ) ; } } } } } } } ) ; } } } } } } } } } } ) ; } } } } } } } } } } } } } } } ) ; } } } } } } } } 

In [34]:
text = "removes all punctuation marks from a post concode_field_sep int VocabularySize concode_elem_sep int CountofWordsInTwenties concode_elem_sep int CountofWordsInThirties concode_elem_sep StopWord stopWord concode_elem_sep int CountofWordsInTeens concode_field_sep void updateNaiveBayesMapForSinglePost concode_elem_sep int getCountofWordsInTeens concode_elem_sep void setCountofWordsInThirties concode_elem_sep HashMap<String,HashMap<String,Integer>> trainNaiveBayes concode_elem_sep void updateClassWordCount concode_elem_sep void setVocabularySize concode_elem_sep void setCountofWordsInTwenties concode_elem_sep int getVocabularySize concode_elem_sep String[] cleanPost concode_elem_sep void setCountofWordsInTeens concode_elem_sep int getCountofWordsInTwenties concode_elem_sep int getCountofWordsInThirties concode_elem_sep void updateNaiveBayesMap"
input_ids = tokenizer(text, return_tensors="pt").input_ids

In [36]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=2048,
#         min_length=500,
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: String [ ] function ( String arg0 ) { String [ ] loc0 = arg0. split ( "srini_string" ) ; for ( int loc1 = 0 ; loc1 < loc0. length ; loc1 ++ ) { if ( loc0 [ loc1 ]. contains ( "srini_string" ) ) { loc0 [ loc1 ] = null ; } } return loc0 ; }
93
1: String [ ] function ( String [ ] arg0 ) { String [ ] loc0 = StringUtils. tokenizeToStringArray ( arg0 ) ; for ( String loc1 : loc0 ) { if ( loc1. startsWith ( "srini_string" ) ) loc0 = loc1. substring ( 0, loc1. length ( ) - 1 ) ; else loc0 = cleanPost ( loc0 ) ; } return loc0 ; }
93
2: String [ ] function ( String [ ] arg0 ) { String [ ] loc0 = StringUtils. tokenizeToStringArray ( arg0 ) ; for ( String loc1 : loc0 ) { if ( loc1. contains ( "srini_string" ) ) { loc0 = Arrays. copyOfRange ( loc0, "srini_string", loc1. length ( ) - 1 ) ; } } return loc0 ; }
93


In [39]:
text = "replace all punctuation marks from a post concode_field_sep int VocabularySize concode_elem_sep int CountofWordsInTwenties concode_elem_sep int CountofWordsInThirties concode_elem_sep StopWord stopWord concode_elem_sep int CountofWordsInTeens concode_field_sep void updateNaiveBayesMapForSinglePost concode_elem_sep int getCountofWordsInTeens concode_elem_sep void setCountofWordsInThirties concode_elem_sep HashMap<String,HashMap<String,Integer>> trainNaiveBayes concode_elem_sep void updateClassWordCount concode_elem_sep void setVocabularySize concode_elem_sep void setCountofWordsInTwenties concode_elem_sep int getVocabularySize concode_elem_sep String[] cleanPost concode_elem_sep void setCountofWordsInTeens concode_elem_sep int getCountofWordsInTwenties concode_elem_sep int getCountofWordsInThirties concode_elem_sep void updateNaiveBayesMap"
input_ids = tokenizer(text, return_tensors="pt").input_ids

In [40]:
import utils

sample_outputs = model.generate(
        input_ids, 
        do_sample=True, 
        max_length=2048,
#         min_length=500,
        top_p=0.95, # dummy example: if the next word probs are "I": 75%, "You": 10%, "We": "5%", etc. etc. with smaller %ages, a value of 0.9 would make the model sample from just these 3 and ignore the rest
        top_k=50, # filter sampling pool to n words
        temperature=0.7,
        num_return_sequences=3 # number of samples to generate
    )

utils.print_generated_sample_outputs(sample_outputs, tokenizer)


Output:
----------------------------------------------------------------------------------------------------
0: String function ( String arg0 ) { String loc0 = arg0. replaceAll ( "srini_string", "srini_string" ) ; String loc1 = stopWord. getText ( ) ; for ( String loc2 : loc0. split ( "srini_string" ) ) { loc1 = loc1. replaceAll ( loc2, "srini_string" ) ; if ( loc1. length ( ) > 0 ) loc0 = loc0. substring ( 0, loc1. length ( ) - 1 ) ; } return loc0 ; }
122
1: String function ( String arg0 ) { if ( stopWord. hasPunct ( ) ) { return arg0. replaceAll ( "srini_string", "srini_string" ) ; } else { return arg0 ; } }
122
2: String function ( String arg0 ) { String loc0 = stopWord. getText ( arg0 ) ; StringTokenizer loc1 = new StringTokenizer ( loc0, "srini_string" ) ; while ( loc1. hasMoreTokens ( ) ) { loc0 = loc1. nextToken ( ) ; } return loc0 ; }
122
