Skip to content

Commit

Permalink
Add wip PDB parsers for ATOM, HETATM, TER, DBREF records
Browse files Browse the repository at this point in the history
  • Loading branch information
kMutagene committed May 11, 2022
1 parent c925318 commit 426dee6
Show file tree
Hide file tree
Showing 6 changed files with 2,270 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/BioFSharp.IO/BioFSharp.IO.fsproj
Expand Up @@ -37,6 +37,7 @@
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="GenerateSOFTTypes.fsx" />
<Compile Include="PDB.fs" />
<Compile Include="GAF.fs" />
<Compile Include="AgilentRaw.fs" />
<Compile Include="BlastWrapper.fs" />
Expand Down
458 changes: 458 additions & 0 deletions src/BioFSharp.IO/PDB.fs

Large diffs are not rendered by default.

91 changes: 91 additions & 0 deletions src/BioFSharp.Interactive/test.ipynb
Expand Up @@ -48,6 +48,97 @@
"#r \"nuget: BioFSharp.Interactive, 0.0.0-dev\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "fsharp"
}
},
"outputs": [],
"source": [
"\n",
"open BioFSharp\n",
"\n",
"open BioFSharp\n",
"open BioFSharp.Algorithm\n",
"open BioFSharp.Algorithm.PairwiseAlignment\n",
"\n",
"let aaScoring = ScoringMatrix.getScoringMatrixAminoAcid ScoringMatrix.ScoringMatrixAminoAcid.BLOSUM62\n",
"let nucScoring = ScoringMatrix.getScoringMatrixNucleotide ScoringMatrix.ScoringMatrixNucleotide.EDNA\n",
"\n",
"//For aminoacids\n",
"let costAA = {\n",
" Open = -5\n",
" Continuation = -1\n",
" Similarity = aaScoring \n",
" }\n",
"\n",
"//For nucleotides\n",
"let costN = {\n",
" Open = -5\n",
" Continuation = -1\n",
" Similarity = nucScoring \n",
" }\n",
"\n",
"let query1AA = \"NLFVAAAAQTKNGQGWVPSNYITPVNSAAA\" |> BioArray.ofAminoAcidSymbolString\n",
"let query2AA = \"NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNS\" |> BioArray.ofAminoAcidSymbolString\n",
"\n",
"let localAA = \n",
" PairwiseAlignment.Local.SmithWaterman.align(query1AA,query2AA,costAA)\n",
"\n",
"let globalAA =\n",
" PairwiseAlignment.Global.NeedlemanWunsch.align(query1AA,query2AA,costAA)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "fsharp"
}
},
"outputs": [
{
"data": {
"text/html": [
"<table><thead><tr><th>Score</th><th>GapPenalty</th><th>ExtendGapPenalty</th><th>Length</th><th>Identity</th><th>IdentityFraction</th><th>Seq1AlignmentStartIndex</th><th>Seq2AlignmentStartIndex</th></tr></thead><tbody><tr><td><div class=\"dni-plaintext\">104</div></td><td><div class=\"dni-plaintext\">-5</div></td><td><div class=\"dni-plaintext\">-1</div></td><td><div class=\"dni-plaintext\">41</div></td><td><div class=\"dni-plaintext\">22</div></td><td><div class=\"dni-plaintext\">0.5365853658536586</div></td><td><div class=\"dni-plaintext\">27</div></td><td><div class=\"dni-plaintext\">58</div></td></tr></tbody></table>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"localAA.MetaData"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"dotnet_interactive": {
"language": "fsharp"
}
},
"outputs": [
{
"data": {
"text/html": [
"<table><thead><tr><th><i>index</i></th></tr></thead><tbody><tr><td>0</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>3</td></tr><tr><td>4</td></tr><tr><td>5</td></tr><tr><td>6</td></tr><tr><td>7</td></tr><tr><td>8</td></tr><tr><td>9</td></tr><tr><td>10</td></tr><tr><td>11</td></tr><tr><td>12</td></tr><tr><td>13</td></tr><tr><td>14</td></tr><tr><td>15</td></tr><tr><td>16</td></tr><tr><td>17</td></tr><tr><td>18</td></tr><tr><td>19</td></tr><tr><td colspan=\"1\"><i>... (more)</i></td></tr></tbody></table>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"let a = localAA.Sequences |> Seq.item 0\n",
"a\n"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
34 changes: 33 additions & 1 deletion src/BioFSharp/Playground.fsx
Expand Up @@ -39,4 +39,36 @@ open FSharpAux
open FSharpAux.IO
open FSharp.Stats

open BioFSharp
open BioFSharp

open BioFSharp
open BioFSharp.Algorithm
open BioFSharp.Algorithm.PairwiseAlignment

let aaScoring = ScoringMatrix.getScoringMatrixAminoAcid ScoringMatrix.ScoringMatrixAminoAcid.BLOSUM62
let nucScoring = ScoringMatrix.getScoringMatrixNucleotide ScoringMatrix.ScoringMatrixNucleotide.EDNA

//For aminoacids
let costAA = {
Open = 5
Continuation = -1
Similarity = aaScoring
}

//For nucleotides
let costN = {
Open = -5
Continuation = -1
Similarity = nucScoring
}

let query1AA = "NLFVAAAAQTKNGQGWVPSNYITPVNSAAA" |> BioArray.ofAminoAcidSymbolString
let query2AA = "NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNS" |> BioArray.ofAminoAcidSymbolString

let localAA =
PairwiseAlignment.Local.SmithWaterman.align(query1AA,query2AA,costAA)

let globalAA =
PairwiseAlignment.Global.NeedlemanWunsch.align(query1AA,query2AA,costAA)

localAA.MetaData.Score
1 change: 1 addition & 0 deletions tests/BioFSharp.IO.Tests/BioFSharp.IO.Tests.fsproj
Expand Up @@ -7,6 +7,7 @@
</PropertyGroup>

<ItemGroup>
<Compile Include="ParserTests\PDB.fs" />
<Compile Include="ParserTests\Stride.fs" />
<Compile Include="ParserTests\DSSP.fs" />
<Compile Include="Main.fs" />
Expand Down

0 comments on commit 426dee6

Please sign in to comment.