### 1. General Imports and Language pragmas

In [1]:
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
import           "lens"        Control.Lens
import           "lens-aeson"  Data.Aeson.Lens   
import           "uniplate"    Data.Generics.Uniplate.Data (universeBi)
import qualified "bytestring"  Data.ByteString.Lazy.Char8 as B (readFile, take)

### 2. Import PowerQuery AST (Abstract Syntax Tree) & parsing libraries

In [2]:
import           "language-powerquery-ast" Language.PowerQuery.AST
import           "language-powerquery"     Language.PowerQuery
import           "pbix"                    Codec.Pbix.Types

### 3. Read a __.pbix__ file to a __ByteString__ in memory
It's a __PK__ zip archive file

In [3]:
bs <- B.readFile "iris.pbix"
:t bs

print . B.take 30 $ bs

"PK\ETX\EOT\DC4\NUL\NUL\NUL\b\NUL\140\SUB\179N<\215s\201\n\NUL\NUL\NUL\b\NUL\NUL\NUL\a\NUL\FS\NUL"

### 4. Extract the __Section1.m__ formula from the __.pbix__
using __lens__es

In [4]:
:t bs ^. pbix . dataMashup . formula "Section1.m"

bs ^. pbix . dataMashup . formula "Section1.m"

Formula "section Section1;\r\n\r\nshared iris = let\r\n    Source = Csv.Document(File.Contents(\"\\\\VBOXSVR\\Shared\\PowerBI\\iris.csv\"),[Delimiter=\",\", Columns=5, Encoding=1252, QuoteStyle=QuoteStyle.None]),\r\n    #\"Promoted Headers\" = Table.PromoteHeaders(Source, [PromoteAllScalars=true]),\r\n    #\"Changed Type\" = Table.TransformColumnTypes(#\"Promoted Headers\",{{\"sepal_length\", type number}, {\"sepal_width\", type number}, {\"petal_length\", type number}, {\"petal_width\", type number}, {\"species\", type text}})\r\nin\r\n    #\"Changed Type\";"

### 5. Parse __Section1.m__ to an AST data structure
using __lens__es

In [5]:
:t bs ^. pbix . dataMashup . formula "Section1.m" . document

bs ^. pbix . dataMashup . formula "Section1.m" . document

SectionDocument (Section {_section_attributes = Nothing, _section_name = Just (RegularI "Section1"), _section_members = Just [SectionMember {_sectionMember_attributes = Nothing, _sectionMember_shared = True, _sectionMember_name = RegularI "iris", _sectionMember_expression = LetE (LetExpression {_letExpression_variableList = [Variable {_variable_name = RegularI "Source", _variable_expression = LogicalE (And_OE (Is_LAE (As_IE (EqualityAE (RelationalEE (AdditiveRE (MultiplicativeAE (MetadataME (MetadataExpression {_metadataExpression_first = UnaryType (Primary_TE (InvokePE (InvokeExpression {_invokeExpression_primary = FieldAccessPE (ImplicitTargetProjection {_implicitTargetProjection_identifier = RegularI "Csv.Document", _implicitTargetProjection_annotation = Just Annotation}), _invokeExpression_argumentList = Just [LogicalE (And_OE (Is_LAE (As_IE (EqualityAE (RelationalEE (AdditiveRE (MultiplicativeAE (MetadataME (MetadataExpression {_metadataExpression_first = UnaryType (Primary_TE (In

### 6. Get all __String__ literals in __Section1.m__
- Using __uniplate__ library
- We get all columns names ("features" names) from CSV 

In [6]:
let ast = bs ^. pbix . dataMashup . formula "Section1.m" . document

let literals = universeBi :: (Document Annotation -> [Literal])
literals ast ^.. traversed . _StringL

["\"\\\\VBOXSVR\\Shared\\PowerBI\\iris.csv\"","\",\"","\"sepal_length\"","\"sepal_width\"","\"petal_length\"","\"petal_width\"","\"species\""]

### 7. What kind of data source are we using?

In [7]:
let ast = bs ^. pbix . dataMashup . formula "Section1.m" . document

getSource ast = ident
    where
        variables = universeBi :: (Document Annotation -> [Variable Annotation])
        variable  = head [v | v <- variables ast, v ^. variable_name == RegularI "Source"]

        invokes = universeBi :: (Variable Annotation -> [InvokeExpression Annotation])
        invoke  = head $ invokes variable

        idents  = universeBi :: (InvokeExpression Annotation -> [Identifier])
        ident   = head $ idents invoke

getSource ast

RegularI "Csv.Document"