In [1]:
import random
import pandas as pd
import pyterrier as pt 
import pickle
from pyterrier.measures import *
import os
from pathlib import *
import json

### jupyter display setting
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

  app.launch_new_instance()


In [8]:
paths = dict()
paths['dataset'] = os.path.join(Path(os.getcwd()).parent.absolute(),'dataset')
paths['data_txt']  = os.path.join(paths['dataset'], 'data_txt')
paths['data_json'] = os.path.join(paths['dataset'], 'data_json')
paths['data_csv']  = os.path.join(paths['dataset'], 'pyterrier-index.csv')
paths['pt_index']  = './resume_index'
paths['synonymDict']  = '../dataset/query_expansion.pkl'
paths['query']  = '../dataset/query.csv'
paths['qrels']  = '../dataset/qrel/'
paths

{'dataset': '/home/hsuanhs/eecs549/SI650-Final-Project/dataset',
 'data_txt': '/home/hsuanhs/eecs549/SI650-Final-Project/dataset/data_txt',
 'data_json': '/home/hsuanhs/eecs549/SI650-Final-Project/dataset/data_json',
 'data_csv': '/home/hsuanhs/eecs549/SI650-Final-Project/dataset/pyterrier-index.csv',
 'pt_index': './resume_index',
 'synonymDict': '../dataset/query_expansion.pkl',
 'query': '../dataset/query.csv',
 'qrels': '../dataset/qrel/'}

# load profiles (documents)

In [5]:
def genRandomQrels(df_query):
    queries = list(df_query['query'])
    rows = []
    for qid,query in enumerate(queries):
        qid += 1
        for docno in list( profiles['docno'] ):
            label = random.randint(0,5)
            iteration = random.randint(1,5)/2
            row = [str(qid),str(docno),int(label),str(iteration)]
            rows.append(row)
    qrels = pd.DataFrame(rows, columns=["qid", "docno","label",'iteration'])
    return qrels

def JsontoDataFrame(profiles_json, split=True):
    rows = []
    docno = 0
    for profile in profiles_json:
        meta_data = {
            "education": json.dumps(profile["Education"]),
            "company": None,
            "title": None,
            "period": None,
        }
        for expBlock in profile["Experience"]:
            meta_data["company"] = expBlock["company"]
            meta_data["title"]   = expBlock["title"]
            meta_data["period"]  = expBlock["period"]
            if expBlock["description"]:
                bulletpoints = expBlock["description"].split('\n') if split else [expBlock["description"]]
                for bulletpoint in bulletpoints:
                    if len(bulletpoint.split()) >= 5:
                        row = [str(docno),bulletpoint]
                        for item in meta_data.values():
                            row.append(item)
                        rows.append(row)
                        docno += 1
    docs_df =   pd.DataFrame(rows,columns= ['docno','text']+list(meta_data.keys()) )
    return docs_df

def loadProfiles(paths):
    if not os.path.exists(paths['pt_index'] + "/data.properties"):
        # get raw json files & preprocess
        profiles_json = []
        for filename in os.listdir(paths['data_json']):
            path_profile  = os.path.join( paths['data_json'], filename  )
            with open(path_profile) as ptr:
                profile_json = json.load(ptr)
                profiles_json.append(profile_json)
        profiles = JsontoDataFrame(profiles_json)

    else:
        profiles = pd.read_csv(paths['data_csv'])
        profiles['docno'] = profiles['docno'].astype(str)
    return profiles

def loadSynonymDict(paths):
    file = open(paths['synonymDict'], 'rb')
    return pickle.load(file)

def loadQueries(paths):
    df_query = pd.read_csv(paths['query'])
    df_query['qid'] = df_query['qid'].astype(str)
    return df_query

def loadQrels(paths):
    queries = []
    for i in range(1, 8):
        subpath = paths['qrels'] + f'query - {i}.csv'
        print(subpath)
        df = pd.read_csv(subpath)
        queries.append(df)
    qrels = pd.concat(queries)
    
    qrels = qrels.rename(columns={"score": "label"})

    qrels = qrels.drop(['education', 'company', 'title', 'period', 'text'], axis=1)
    qrels = qrels.reset_index(drop=True)
    qrels = qrels.fillna(-1)
    
    qrels['qid'] = qrels['qid'].astype(str)
    qrels['docno'] = qrels['docno'].astype(str)
    qrels['label'] = qrels['label'].astype(int)
    return qrels
    
def expandQuery(df_query, synonymDict, weight = 10):
    for ind in df_query.index:
        query_tmp = df_query['query'][ind]
        synonyms = synonymDict[query_tmp]
        query_expand = [query_tmp]*weight + synonyms
        df_query['query'][ind] = ' '.join(query_expand)
    return df_query

def indexing(paths, pt, profiles):
    ### load index files to index obj
    
    ### If index fils are not existed, create index files by indexer
    if not os.path.exists(paths['pt_index'] + "/data.properties"):
        index_dir = paths['pt_index']
        indexer = pt.DFIndexer(index_dir, overwrite=True)
        index_ref = indexer.index(profiles["text"], profiles["docno"])

        ### load the index
        index = pt.IndexFactory.of(index_ref)
    else:
        index = pt.IndexFactory.of(paths['pt_index'])
    
    ### show the stat 
    print(index.getCollectionStatistics().toString())
    ### show the reverse index
    print(index.getLexicon()["python"])
    
    return index

### initialization 
if not pt.started():
    pt.init()

    

profiles    = loadProfiles(paths)
synonymDict = loadSynonymDict(paths)
df_query    = loadQueries(paths)
df_query    = expandQuery(df_query,synonymDict,20)
qrels = loadQrels(paths)
index = indexing(paths,pt,profiles)

bm25  = pt.BatchRetrieve(index, wmodel="BM25")
tfidf = pt.BatchRetrieve(index, wmodel="TF_IDF")
dph = pt.BatchRetrieve(index, wmodel="DPH")

bo1 = pt.rewrite.Bo1QueryExpansion(index)
pipeline_QE_bm25  = dph >> bo1 >> bm25
pipeline_QE_tfidf = dph >> bo1 >> tfidf
pipeline_QE_dph   = dph >> bo1 >> dph


../dataset/qrel/query - 1.csv
../dataset/qrel/query - 2.csv
../dataset/qrel/query - 3.csv
../dataset/qrel/query - 4.csv
../dataset/qrel/query - 5.csv
../dataset/qrel/query - 6.csv
../dataset/qrel/query - 7.csv
Number of documents: 363
Number of terms: 1510
Number of postings: 4637
Number of fields: 0
Number of tokens: 4906
Field names: []
Positions:   false

term132 Nt=47 TF=48 maxTF=2 @{0 4369 1}


In [6]:
profiles

Unnamed: 0,docno,text,education,company,title,period
0,0,Analyzed performance on computer cache and improved it by using machine learning algorithms,"[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",MediaTek Internship,Software Development Intern,May 2021 – Aug 2021
1,1,"• Utilized perceptron learning algorithm on cache reuse prediction and constructed a cache predictor in Python, which improved the prediction accuracy in the cache and reduced the cache miss rate.","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",MediaTek Internship,Software Development Intern,May 2021 – Aug 2021
2,2,• Reduced 24% of cache miss rate compared to LRU cache and improved 6% of the cache access performance over LRU cache.,"[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",MediaTek Internship,Software Development Intern,May 2021 – Aug 2021
3,3,"Cooperated with hardware engineers and customers, building software architectures and embedded systems","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]","Aslent Technology Co., Ltd Full-time",Software Engineer,Mar 2020 – Nov 2020
4,4,"• Engineered a wafer auto-storage system to arrange the wafer and monitor the status automatically in Python, which minimized labor cost by 20% and risks of operation by 30%.","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]","Aslent Technology Co., Ltd Full-time",Software Engineer,Mar 2020 – Nov 2020
5,5,• Built a controller in C++ integrating different types of readers into one device to reduce hardware cost by 30%.,"[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]","Aslent Technology Co., Ltd Full-time",Software Engineer,Mar 2020 – Nov 2020
6,6,Constructed and maintained automated production systems,"[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",TSMC Full-time,Backend Software Engineer,Sep 2014 – Oct 2018
7,7,"• Built and Boosted backend services on semiconductor process automated systems by using C++, which achieved full process automation and conserved labor cost by 10% while improving process yield by 23%.","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",TSMC Full-time,Backend Software Engineer,Sep 2014 – Oct 2018
8,8,"• Established a report auto-generation system in Python, which reduced labor costs by 12%.","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",TSMC Full-time,Backend Software Engineer,Sep 2014 – Oct 2018
9,9,"• Built learning ability assessment platform, helping 1K daily active users improve academic performance","[{""school"": ""University of Illinois Urbana-Champaign"", ""degree"": ""Master's degree"", ""major"": ""Computer Science"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science"", ""period"": ""2015 \u2013 2018""}]",QSticker Full-time,"Co-founder, Full Stack Engineer",Aug 2020 – Jul 2021


In [4]:
df_query    = loadQueries(paths)
pt.Experiment(
    [tfidf, bm25 ,dph ,pipeline_QE_tfidf, pipeline_QE_bm25, pipeline_QE_dph],
    df_query.loc[1:7],
    qrels,
    names=["TF_IDF", "BM25" , "dph", "bo1 + TF_IDF", "bo1 + BM25" , "bo1 + dph"],
    eval_metrics=["map", "ndcg",nDCG@5,nDCG@10])

Unnamed: 0,name,map,ndcg,nDCG@5,nDCG@10
0,TF_IDF,0.025759,0.098167,0.148718,0.114629
1,BM25,0.025759,0.098167,0.148718,0.114629
2,dph,0.023076,0.091072,0.120454,0.119648
3,bo1 + TF_IDF,0.050509,0.198308,0.148718,0.11381
4,bo1 + BM25,0.050299,0.198035,0.148718,0.11381
5,bo1 + dph,0.048121,0.194492,0.134921,0.111127


In [22]:
for weight in range(1,61):
    df_query    = loadQueries(paths)
    df_query    = expandQuery(df_query,synonymDict,weight)
    results = pt.Experiment(
        [tfidf, bm25 ,dph ,pipeline_QE_tfidf, pipeline_QE_bm25, pipeline_QE_dph],
        df_query.loc[1:7],
        qrels,
        names=["TF_IDF", "BM25" , "dph", "bo1 + TF_IDF", "bo1 + BM25" , "bo1 + dph"],
        eval_metrics=["map", "ndcg",nDCG@5,nDCG@10])
    print('--------------------------',weight,'--------------------------')
    print(results)

-------------------------- 1 --------------------------
           name       map      ndcg    nDCG@5   nDCG@10
0  TF_IDF        0.369951  0.656799  0.581156  0.524285
1  BM25          0.377461  0.667069  0.591084  0.550526
2  dph           0.383143  0.661338  0.545207  0.549236
3  bo1 + TF_IDF  0.397088  0.687902  0.570440  0.547335
4  bo1 + BM25    0.402704  0.696553  0.588825  0.559666
5  bo1 + dph     0.403589  0.686900  0.535664  0.567887
-------------------------- 2 --------------------------
           name       map      ndcg    nDCG@5   nDCG@10
0  TF_IDF        0.453789  0.725192  0.674454  0.665574
1  BM25          0.447783  0.719462  0.667053  0.649914
2  dph           0.452004  0.709559  0.616871  0.652351
3  bo1 + TF_IDF  0.451731  0.721322  0.647393  0.589281
4  bo1 + BM25    0.453209  0.721753  0.644916  0.588203
5  bo1 + dph     0.465060  0.724126  0.608699  0.638309
-------------------------- 3 --------------------------
           name       map      ndcg    nDCG@5   

-------------------------- 20 --------------------------
           name       map      ndcg    nDCG@5   nDCG@10
0  TF_IDF        0.542146  0.784880  0.818867  0.762084
1  BM25          0.549919  0.788391  0.822785  0.765156
2  dph           0.543937  0.786391  0.837252  0.741675
3  bo1 + TF_IDF  0.499338  0.792827  0.822785  0.704586
4  bo1 + BM25    0.499379  0.792252  0.800918  0.702800
5  bo1 + dph     0.497638  0.793113  0.800918  0.724927
-------------------------- 21 --------------------------
           name       map      ndcg    nDCG@5   nDCG@10
0  TF_IDF        0.541928  0.784807  0.818867  0.762084
1  BM25          0.546279  0.786060  0.818867  0.762613
2  dph           0.543756  0.786197  0.837252  0.741675
3  bo1 + TF_IDF  0.499158  0.792752  0.822785  0.704586
4  bo1 + BM25    0.498841  0.791995  0.800918  0.702800
5  bo1 + dph     0.496697  0.792549  0.800918  0.724927
-------------------------- 22 --------------------------
           name       map      ndcg    nDCG@5

-------------------------- 39 --------------------------
           name       map      ndcg    nDCG@5   nDCG@10
0  TF_IDF        0.542608  0.793551  0.832328  0.773705
1  BM25          0.545790  0.794620  0.832328  0.774235
2  dph           0.554660  0.805722  0.870470  0.771243
3  bo1 + TF_IDF  0.496974  0.791905  0.822785  0.715144
4  bo1 + BM25    0.497094  0.793376  0.825263  0.717006
5  bo1 + dph     0.491797  0.790144  0.798441  0.718237
-------------------------- 40 --------------------------
           name       map      ndcg    nDCG@5   nDCG@10
0  TF_IDF        0.542608  0.793614  0.832328  0.773705
1  BM25          0.545712  0.794609  0.832328  0.774235
2  dph           0.554660  0.805722  0.870470  0.771243
3  bo1 + TF_IDF  0.496917  0.791886  0.822785  0.715144
4  bo1 + BM25    0.497057  0.793366  0.825263  0.717006
5  bo1 + dph     0.491640  0.790079  0.798441  0.718237
-------------------------- 41 --------------------------
           name       map      ndcg    nDCG@5

-------------------------- 58 --------------------------
           name       map      ndcg    nDCG@5   nDCG@10
0  TF_IDF        0.542021  0.793490  0.832328  0.773705
1  BM25          0.544608  0.794422  0.832328  0.774235
2  dph           0.557231  0.807447  0.870470  0.783253
3  bo1 + TF_IDF  0.495733  0.791301  0.822785  0.715144
4  bo1 + BM25    0.496817  0.793114  0.825263  0.717006
5  bo1 + dph     0.491735  0.790289  0.798441  0.718237
-------------------------- 59 --------------------------
           name       map      ndcg    nDCG@5   nDCG@10
0  TF_IDF        0.541990  0.793485  0.832328  0.773705
1  BM25          0.544608  0.794422  0.832328  0.774235
2  dph           0.557231  0.807447  0.870470  0.783253
3  bo1 + TF_IDF  0.495730  0.791292  0.822785  0.715144
4  bo1 + BM25    0.496745  0.793083  0.825263  0.717006
5  bo1 + dph     0.491661  0.790275  0.798441  0.718237
-------------------------- 60 --------------------------
           name       map      ndcg    nDCG@5

In [None]:


query_tmp = df_query['query'][0]
results = bm25.search(query_tmp)
ids = list( results['docid'])
print('query:')
print(query_tmp)
print('\nsearch results (top10):')
for i, id in enumerate(ids[:10]):
    text = profiles.iloc[id]['text']
    print('----------------------------------rank %d, docno %d ----------------------------------'%(i,id))
    print(text)
    print()

In [14]:
type( qrels['label'][1] )

numpy.int64

In [15]:
sum(qrels['label'])

-1929

In [16]:
qrels['label']

0      -1
1      -1
2      -1
3      -1
4      -1
5      -1
6      -1
7      -1
8      -1
9      -1
10      1
11     -1
12      1
13      2
14     -1
15     -1
16      1
17      1
18      1
19      1
20      1
21     -1
22      1
23     -1
24      1
25     -1
26     -1
27     -1
28     -1
29     -1
30     -1
31     -1
32      1
33     -1
34     -1
35     -1
36     -1
37     -1
38     -1
39     -1
40     -1
41     -1
42     -1
43      1
44     -1
45      2
46     -1
47     -1
48     -1
49     -1
50     -1
51     -1
52     -1
53     -1
54     -1
55      2
56     -1
57     -1
58     -1
59      1
60      1
61     -1
62     -1
63      2
64     -1
65     -1
66     -1
67     -1
68     -1
69     -1
70      1
71     -1
72      2
73      2
74     -1
75     -1
76      1
77     -1
78     -1
79     -1
80     -1
81     -1
82     -1
83     -1
84     -1
85     -1
86     -1
87     -1
88     -1
89     -1
90     -1
91      2
92     -1
93     -1
94     -1
95     -1
96      2
97     -1
98     -1
99     -1


In [17]:
df_query.loc[1:7]

Unnamed: 0,qid,query
1,2,Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp Information retrieval nlp term document incidence matrix clustering document matrix cross lingual inverted index extraction natural language processing python vector stp parsing retrieval technique boolean retrieval github vector space information extraction
2,3,teamwork teamwork teamwork teamwork teamwork teamwork teamwork teamwork teamwork teamwork teamwork teamwork teamwork clipart collaboration leadership background animated sports icon hands transparent communication vector puzzle office logo basketball healthcare construction military inspirational project management illustration safety nursing black powerpoint football unity classroom cooperation disney
3,4,web front end web front end web front end web front end web front end web front end web front end web front end web front end web front end web front end web front end web front end back end logo coding app back backend developer vs backend programming frameworks node js web developer database website technologies css application web application design
4,5,aws cloud aws cloud aws cloud aws cloud aws cloud aws cloud aws cloud aws cloud aws cloud aws cloud aws cloud aws cloud aws cloud logo transparent icon amazon diagram architecture azure security vpc hybrid visio infrastructure services background hosting network disaster recovery migration application iot data solution load balancer platform performance certification on premise deployment management journey
5,6,machine learning machine learning machine learning machine learning machine learning machine learning machine learning machine learning machine learning machine learning machine learning machine learning machine learning artificial intelligence classification deep learning supervised algorithm neural network data python big data wallpaper regression diagram icon decision tree unsupervised infographic training prediction robot cheat sheet natural language processing cloud anomaly detection azure ensemble difference iot visualization marketing computer vision
6,7,data visualization data visualization data visualization data visualization data visualization data visualization data visualization data visualization data visualization data visualization data visualization data visualization data visualization infographic graph dashboard network map art beautiful design tableau excel time simple creative health comparison circle tree interactive big data charts business intelligence processing cool pie chart complex marketing music stunning color heat map
7,8,sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build sdk platform build studio tools emulator visual studio emulator unity android studio software development kit flutter xamarin android emulator api level react native xamarin android build tools platforms android xamarin forms


In [23]:
profiles

Unnamed: 0,docno,text,education,company,title,period
0,0,Analyzed performance on computer cache and improved it by using machine learning algorithms,"[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",MediaTek Internship,Software Development Intern,May 2021 – Aug 2021
1,1,"• Utilized perceptron learning algorithm on cache reuse prediction and constructed a cache predictor in Python, which improved the prediction accuracy in the cache and reduced the cache miss rate.","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",MediaTek Internship,Software Development Intern,May 2021 – Aug 2021
2,2,• Reduced 24% of cache miss rate compared to LRU cache and improved 6% of the cache access performance over LRU cache.,"[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",MediaTek Internship,Software Development Intern,May 2021 – Aug 2021
3,3,"Cooperated with hardware engineers and customers, building software architectures and embedded systems","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]","Aslent Technology Co., Ltd Full-time",Software Engineer,Mar 2020 – Nov 2020
4,4,"• Engineered a wafer auto-storage system to arrange the wafer and monitor the status automatically in Python, which minimized labor cost by 20% and risks of operation by 30%.","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]","Aslent Technology Co., Ltd Full-time",Software Engineer,Mar 2020 – Nov 2020
5,5,• Built a controller in C++ integrating different types of readers into one device to reduce hardware cost by 30%.,"[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]","Aslent Technology Co., Ltd Full-time",Software Engineer,Mar 2020 – Nov 2020
6,6,Constructed and maintained automated production systems,"[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",TSMC Full-time,Backend Software Engineer,Sep 2014 – Oct 2018
7,7,"• Built and Boosted backend services on semiconductor process automated systems by using C++, which achieved full process automation and conserved labor cost by 10% while improving process yield by 23%.","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",TSMC Full-time,Backend Software Engineer,Sep 2014 – Oct 2018
8,8,"• Established a report auto-generation system in Python, which reduced labor costs by 12%.","[{""school"": ""Carnegie Mellon University"", ""degree"": ""Master of Science - MS"", ""major"": ""Computer Software Engineering"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Master of Science - MS"", ""major"": ""Communication engineering"", ""period"": ""2012 \u2013 2014""}, {""school"": ""National Sun Yat-Sen University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science and Engineering"", ""period"": ""2008 \u2013 2012""}]",TSMC Full-time,Backend Software Engineer,Sep 2014 – Oct 2018
9,9,"• Built learning ability assessment platform, helping 1K daily active users improve academic performance","[{""school"": ""University of Illinois Urbana-Champaign"", ""degree"": ""Master's degree"", ""major"": ""Computer Science"", ""period"": ""2021 \u2013 2022""}, {""school"": ""National Tsing Hua University"", ""degree"": ""Bachelor of Science - BS"", ""major"": ""Computer Science"", ""period"": ""2015 \u2013 2018""}]",QSticker Full-time,"Co-founder, Full Stack Engineer",Aug 2020 – Jul 2021
