## Install MySQLClient

In [1]:
!pip install mysqlclient



## Connect to database
Credentials are provided by server owner. Accounts only have `SELECT` privileges on authorized database. Note that the connection information in this tutorial may differ from permanent server.

In [2]:
import MySQLdb

In [3]:
db = MySQLdb.connect(host='127.0.0.1', port=3306, user='root', passwd='admin', db='djangostack')

### Disconnect from database
When done, be sure to call `db.close()` to safely terminate the connection.

## Database information

TODO: Replace with images of model?

*MySQLdb Documentation*: https://mysqlclient.readthedocs.io/user_guide.html

### Table 1: project
1. id
2. source_url
3. name
4. last_updated	
5. fork_of_id

### Table 2: author
1. id
2. username
3. email

### Table 3*: project_has_author
1. id
2. author_id
3. project_id

### Table 4: commit
1. id
2. datetime
3. author_id
4. project_id
5. message
6. hash
7. branch

### Table 5: diff
1. id
2. file_path
3. language
4. commit_id
5. body

---

***Not yet implemented tables, subject to change***

### Table 6: person
1. id
2. alias
3. email
4. github_username
5. gitlab_username

### Table 7*: person_has_author
1. id
2. author_id
3. person_id

### Table 8: issue

### Table 9: issue_comment

### Table 10: pull_request

\* Bridge table

## Cursor object
To execute queries, create a cursor. Similar to the connection, be sure to call `cursor.close()` when you are done. A cursor can execute multiple queries so no need to create one for each query.

In [4]:
cursor = db.cursor()

## Example 1: Viewing projects (and other tables)


In [5]:
query = 'select * from project' # MySQL keywords are case-insensitive
cursor.execute(query) # Run the query
cursor.fetchone() # alternatively, use fetchall(), fetchmany(n)

(20,
 'https://github.com/spotify/dockerfile-maven.git',
 'dockerfile-maven',
 datetime.datetime(2021, 2, 2, 1, 43, 8),
 None)

## Example 2: Viewing authors associated with a project

In [6]:
url = 'https://github.com/HPCL/ideas-uo.git'
query = '''select username, email 
from author a join project_has_author pha on(a.id = pha.author_id) join project p on(pha.project_id = p.id)
where source_url = %s
'''
cursor.execute(query, (url,))
cursor.fetchall()

(('Carter Perkins', 'cartersperkins@gmail.com'),
 ('Bosco Ndemeye', 'ndemeye@cs.uoregon.edu'),
 ('Jason Prideaux', 'jprideau@cs.uoregon.edu'),
 ('Boyana Norris', 'brnorris03@gmail.com'),
 ('Boyana Norris', 'brnorris03@users.noreply.github.com'),
 ('Boyana Norris', 'norris@cs.uoregon.edu'),
 ('fickas', 'fickas@cs.uoregon.edu'),
 ('Stephen Fickas', 'fickas@cs.uoregon.edu'))

## Example 3: Viewing commits by an author in a project

In [7]:
url = 'https://github.com/HPCL/ideas-uo.git'
name = 'Carter Perkins'
query = '''select c.id, c.datetime, a.username, p.name, c.message, c.hash, c.branch
from commit c join author a on(c.author_id = a.id) join project p on(c.project_id = p.id)
where username = %s and p.source_url = %s'''
cursor.execute(query, (name, url,))
cursor.fetchmany(3)

((685,
  datetime.datetime(2021, 2, 1, 19, 53, 2),
  'Carter Perkins',
  'ideas-uo',
  'django update',
  'e6b6668bd11cf9133f312c03f260cc1807320c69',
  'master'),
 (686,
  datetime.datetime(2021, 2, 1, 19, 50, 54),
  'Carter Perkins',
  'ideas-uo',
  'db update script changes',
  '47889b75a0307111cc423662b7a555477e431e2c',
  'master'),
 (687,
  datetime.datetime(2021, 1, 3, 19, 8, 41),
  'Carter Perkins',
  'ideas-uo',
  'Init Django database files',
  'fbc753c834566d96d36b68058f68bb9f82e0fea4',
  'master'))

## Example 4: Viewing commits between two dates in a project

In [8]:
import datetime

In [9]:
start = datetime.datetime(year=2020, month=11, day=1)
end = datetime.datetime(year=2020, month=12, day=1)

In [10]:
url = 'https://github.com/HPCL/ideas-uo.git'
query = '''select c.datetime, c.message, a.username
from commit c join author a on(c.author_id = a.id) join project p on(c.project_id = p.id)
where (datetime between %s and %s) and source_url = %s'''
cursor.execute(query, (start, end, url,))
cursor.fetchall()

((datetime.datetime(2020, 11, 4, 14, 17, 23),
  'Implementation for reading any GitHub repo',
  'Carter Perkins'),
 (datetime.datetime(2020, 11, 15, 11, 27, 30),
  'add data to use with lammps high_churn pattern notebook',
  'Bosco Ndemeye'),
 (datetime.datetime(2020, 11, 13, 10, 0, 48),
  'Fixing issue of missing diff info.',
  'Jason Prideaux'))

## Example 5: Viewing diffs for a commit

In [11]:
url = 'https://github.com/HPCL/ideas-uo.git'
name = 'Carter Perkins'
file = 'database/README.md'
query = '''select p.name, c.hash, c.message, d.file_path, d.language, d.commit_id, d.body
from diff d join commit c on(d.commit_id = c.id) join project p on (c.project_id = p.id)
where source_url = %s and d.file_path = %s'''
cursor.execute(query, (url, file,))
cursor.fetchone()

('ideas-uo',
 'fbc753c834566d96d36b68058f68bb9f82e0fea4',
 'Init Django database files',
 'database/README.md',
 'PLACEHOLDER',
 687,
 "+# Database Information\n+\n+## Directory Structure                   \n+\n+| Name          | Description                                                         |\n+| ------------  | ------------------------------------------------------------------- | \n+| `django/`     | Django project and corresponding applications for the database.     |\n+| `interface/`  | Script for interfacing with database. Use this file in your script. |\n+| `resources/`  | ER diagrams, website description blurbs, and similar files.         |\n+\n+## Interface Usage\n+\n+***NOT YET READY***\n+Built to run on Google Colab (Python 3.6.9). Make sure the packages in `interface/requirements.txt` are installed. Then copy all the python files in `interface/` to your current working directory. \n+\n+```python\n+\n+from db_interface import DatabaseInterface\n+\n+# Add/update a git pro

In [12]:
# Don't forget!
cursor.close()
db.close()